summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CCPP.class.php554
-rw-r--r--README.md15
2 files changed, 569 insertions, 0 deletions
diff --git a/CCPP.class.php b/CCPP.class.php
new file mode 100644
index 0000000..1532449
--- /dev/null
+++ b/CCPP.class.php
@@ -0,0 +1,554 @@
+<?php
+/**
+ * C Compatible PreProcessor for PHP
+ *
+ * LICENSE:
+ * Copyright (c) 2009, Marin Valeriev Ivanov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Marin Valeriev Ivanov ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Marin Valeriev Ivanov BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @author Marin Ivanov <[email protected]>
+ * @copyright 2009 Marin Ivanov
+ * @license BSD License http://www.metala.org/licenses/2009/bsd-license.txt
+ * @version 0.1 Beta 2
+ *
+ * References:
+ * #1 (ISO/IEC 9899:1990 TC2 WG14/N1124) http://www.open-std.org/JTC1/SC22/WG14/www/standards
+ * #2 Argumented Backus-Naur http://www.ietf.org/rfc/rfc2234.txt
+*/
+
+/**
+Not implemented:
+
+#Pragma
+function macros in function macros
+single hash (quoting string)
+compiled caching
+#if defined()
+*/
+
+// Version scheme in Argumented Backus-Naur Form (ABNF)[2]
+//
+// major = minor = build = *DIGIT
+// stage = (a | b | rc) *DIGIT
+// version = major "." minor ["." build | stage]
+
+define('CCPP_VERSION', '0.1b1');
+
+class CCPP
+{
+ //Parser options
+ public $options = array();
+
+ //Macros
+ public $macros = array(); //Object-like macros
+ public $functionMacros = array(); //Function-like macros
+
+ //Cache
+ protected $_cache_phpSnippets = array();
+ protected $_cache_regexps = array();
+ protected $_cache_formats = array();
+
+ //Constructor
+ public function __construct()
+ {
+ $this->options += array(
+ //Translation
+ 'translate.trigraphs' => true,
+ 'translate.lineSlicing' => true,
+ 'translate.comments' => true,
+
+ //Phase 4 engine
+ 'translate.p4Engine' => 'compiler',
+ 'translate.p4tokenizer' => 'internal',
+ //Phase 4
+ 'translate.directives' => true,
+ 'translate.compactWhitespaces' => false,
+ 'translate.macros' => true,
+ 'translate.functionMacros' => true,
+
+ //Protector
+
+ //Output
+ 'output.format' => 'evaluated',
+ );
+ $this->_prepareCache();
+ $this->_predefineMacros();
+ }
+
+ protected function _prepareCache()
+ {
+ //PHP Snippets
+ $sBOB = 'ob_start();';
+ $sEOB = 'ob_end_flush();';
+
+ $this->_cache_phpSnippets = array(
+ 'BOB' => &$sBOB,
+ 'EOB' => &$sEOB,
+ 'CFH' => "<?php\n".$sBOB."\n?>\n", //Compiled File Header
+ 'CFF' => "\n<?php\n".$sEOB."\n?>", //Compiled File Footer
+ 'POT' => "<?php ",
+ 'PCT' => " ?>",
+ );
+
+ //Regexps
+ $this->_cache_regexps = array(
+ 'defineMacroOperands' => '~^([a-z_][a-z0-9_]+(\\([a-z0-9,_ ]*\\))?)(\s+([^\r\n]+))?~i',
+ 'version' => '~^([0-9]{1,2})\.([0-9]{1,2})((\.([0-9]{1,2}))|((pa|a|b|rc)([0-9]?)))?~',
+ );
+
+ //Formats
+ $this->_cache_formats = array(
+ 'directive' => "#%s %[^\n]",
+ );
+
+ }
+
+ protected function _predefineMacros()
+ {
+ //Version
+ preg_match_all($this->_cache_regexps['version'], CCPP_VERSION, $matches);
+ $ccppVersion = array(
+ 'major' => $matches[1][0],
+ 'minor' => $matches[2][0],
+ 'build' => $matches[5][0],
+ 'stageFull' => $matches[6][0],
+ 'stageName' => $matches[7][0],
+ 'stageVersion' => $matches[8][0],
+ );
+
+ //Macros
+ $this->macros += array(
+ '_CCPP' => 1,
+ '_CCPP_MAJOR' => $ccppVersion['major'],
+ '_CCPP_MINOR' => $ccppVersion['minor'],
+ '_CCPP_VERSION' => $ccppVersion['major'].'.'.$ccppVersion['minor'],
+ '_CCPP_VERSION_FULL' => CCPP_VERSION,
+ '_CCPP_PHP' => 1,
+ '_CCPP_PHP_VERSION' => (float) PHP_VERSION,
+ );
+ //OS
+ $uOS = strtoupper(PHP_OS);
+ if (substr($uOS, 0, 3) === 'WIN')
+ $this->macros += array('WINDOWS' => 1);
+ elseif (substr($uOS, 0, 5) === 'LINUX')
+ $this->macros += array('LINUX' => 1, 'UNIX' => 1);
+ if (strpos($uOS, 'BSD') !== false)
+ $this->macros += array('UNIX' => 1);
+ }
+ // Options getters and setters
+ public function setOption($name, $value) {$this->options[$name] = $value;}
+ public function getOption($name = null)
+ {
+ return isset($name) ? $this->options[$name] : $this->options;
+ }
+ //Macro getters and setters
+ public function define($name, $value) {$this->macros[$name] = $value;}
+ public function isDefined($name) {return isset($this->macros[$name]);}
+ public function getMacro($name) {return $this->macros[$name];}
+
+ public function execute($filename, $offset = 0)
+ {
+ if ($this->options['execute.method'] == 'include') {
+ //tmpname()
+ fwrite(STDERR, '#error Not implemented execution method "include"');
+ exit (1);
+ }
+ else {
+ $evalCode = $this->parseFilename($filename, $offset);
+ $evalCode = '?>' . $evalCode . ((substr($evalCode, -2) == '?>')?'<?php ':'');
+ return eval($evalCode);
+ }
+ }
+ public function parseFilename($filename, $offset = 0)
+ {
+ //Fetch file contents
+ $code = file_get_contents($filename, false, null, $offset);
+ return $this->parse($code);
+ }
+
+ public function parse($code)
+ {
+
+ // ISO/IEC 9899:1990 (ANSI C99) like translation phases
+ // Trigraphs
+ if ($this->options['translate.trigraphs'])
+ $this->_translationPhase1($code);
+ // Line slicing
+ if ($this->options['translate.lineSlicing'])
+ $this->_translationPhase2($code);
+ // Tokenize & Comments
+ if ($this->options['translate.comments'])
+ $this->_translationPhase3($code);
+ // Directives compiler
+ if ($this->options['translate.directives'])
+ $code = $this->_translationPhase4($code);
+
+
+ //Interpretate/evaluate compiled code
+ if ($this->options['output.format'] == 'compiled')
+ ; //Do nothing
+ elseif ($this->options['output.format'] == 'evaluated')
+ $code = $this->_processor_evaluate($code);
+ else
+ die('Invalid output format: '.(isset($this->options['output.format']) ? $this->options['output.format'] : '<NOT SET>')."\n");
+
+ return $code;
+ }
+
+ /**
+ * In phase 1
+ * Trigraphs are translated.
+ *
+ * Reference: ISO/IEC 9899:1990 TC2 (5.1.1.2)
+ */
+ protected function _translationPhase1(&$rCode)
+ {
+
+ $trigraph_pairs = array(
+ '??=' => '#',
+ '??/' => '\\',
+ '??\'' => '^',
+ '??(' => '[',
+ '??)' => ']',
+ '??!' => '|',
+ '??<' => '{',
+ '??>' => '}',
+ '??-' => '~',
+ );
+ $rCode = strtr($rCode, $trigraph_pairs);
+ }
+
+ /**
+ * In phase 2
+ * The sequence <backslash> <new-line> are removed, so content,
+ * that is sliced using the sequence above on more than one line will be compacted in just one line
+ *
+ * Reference: ISO/IEC 9899:1990 TC2 (5.1.1.2)
+ */
+
+ protected function _translationPhase2(&$rCode)
+ {
+ $replace_pairs = array(
+ "\\\n" => '', // Unix-like
+ "\\\r\n" => '', // Windows, DOS, OS/2
+ "\\\r" => '', // Mac OS < 9, Apple ][
+ );
+ $rCode = strtr($rCode, $replace_pairs);
+ }
+
+ /**
+ * In phase 3
+ * Comments are replaced by a single <space> character (\x20)
+ *
+ * Reference: ISO/IEC 9899:1990 TC2 (5.1.1.2)
+ */
+
+ protected function _translationPhase3(&$rCode)
+ {
+ // Copyright headers protection
+ // TODO: more
+ //preg_replace_callback()
+
+ $patterns = array(
+// '~/\*(.*?(copyright).*?)\*/~is'
+ '~^\s*//.*$~m',
+ '~/\*.*?\*/~is',
+ );
+ $replacements = array(
+// '',
+ ' ',
+ ' ',
+ );
+ $rCode = preg_replace($patterns, $replacements, $rCode);
+ }
+
+ /**
+ * Phase 4 is for
+ * Preprocessor directives processing, macros replacement, #pragma, #include, hash hash replacement
+ *
+ * CCPP does phase 3 tokenization in phase 4 in order to keep tokens in this local variable scope
+ * Also CCPP compiles/translates the processed source and PreProcessor directives to PHP code
+ *
+ * Reference: ISO/IEC 9899:1990 TC2 (5.1.1.2)
+ *
+ * Argument $source is not a reference, to safe time copying the string once again
+ * Otherwise code will be like:
+ * $source = $code;
+ * // Initialize compilated code
+ * $code = '';
+ *
+ */
+
+ protected function _translationPhase4($source) //aka. CCPP - Compilation phase
+ {
+ //Initialize
+ $skipTokens = 0;
+
+ // Initialize compilated code
+ $code = '';
+
+ // Prepare PHP snippets
+ $sPOT = &$this->_cache_phpSnippets['POT'];
+ $sPCT = &$this->_cache_phpSnippets['PCT'];
+ $protectPOT = array('<?php' => '<?php echo \'<?php\'; ?>');
+ // Phase 3 Tokenization
+ // Get source file tokens
+ $sourceTokens = token_get_all($source);
+
+ // Phase 4
+ // Processing
+ foreach ($sourceTokens as $key => &$value) {
+ if ($skipTokens > 0) {$skipTokens--; continue;}
+ if (is_string($value)) //Operator
+ $code .= $value;
+ elseif ($value[0] == T_OPEN_TAG) // <?php tag
+ $code .= $sPOT.'echo \'<?php\';'.$sPCT."\n\n";
+ elseif ($value[0] == T_WHITESPACE && $this->options['translate.compactWhitespaces']) //Replaces whitespaces with single space character
+ $code .= ' ';
+ elseif ($value[0] == T_STRING) {
+ $code .= $this->_cache_phpSnippets['POT'].$this->_compiler_macro($sourceTokens, $key, $value, $skipTokens).$this->_cache_phpSnippets['PCT'];
+ }
+ elseif ($value[0] == T_COMMENT && //If tokenizer classified it as comment
+ $value[1][0] == '#' && //If has directive operator
+ ($key == 0 || //Is the first token
+ //OR is preceded by whitespace or comment/directive
+ (is_array($prevToken = $sourceTokens[$key-1]) &&
+ ($prevToken[0] == T_WHITESPACE || $prevToken[0] == T_COMMENT || $prevToken[0] == T_OPEN_TAG)
+ )
+ )
+ )
+ { // BEGIN Directives
+
+ //Parse directive with formated input
+ sscanf($value[1], $this->_cache_formats['directive'], $directive, $op);
+ $directive = strtolower($directive);
+ $op = rtrim($op);
+ // Two-operand directives
+ if ($directive == 'define'){
+ //list($op, $op2) = explode(' ', $op, 2); // Not optimized for FMACRO(a, b) (a / b)
+ preg_match_all($this->_cache_regexps['defineMacroOperands'], $op, $matches);
+ $op = $matches[1][0]; $op2 = $matches[4][0];
+ $code .= $sPOT.'$ccpp->_processor_define('.$this->_protector_singleQuoted($op).', '.$this->_protector_singleQuoted($op2).');'.$sPCT."\n";
+ unset($matches);
+ }
+ // One-operand directives
+ elseif ($directive == 'include')
+ $code .= $sPOT.'echo ($ccpp->_processor_include('. $this->_protector_singleQuoted($op).'));'.$sPCT."\n";
+ elseif ($directive == 'if')
+ $code .= $sPOT.'if ($ccpp->_processor_ifCondition('.$this->_protector_singleQuoted($op).')):'.$sPCT."\n";
+ elseif ($directive == 'elif')
+ $code .= $sPOT.'elseif ($ccpp->_processor_ifCondition('.$this->_protector_singleQuoted($op).')):'.$sPCT."\n";
+ elseif ($directive == 'ifdef')
+ $code .= $sPOT.'if ($ccpp->_processor_isDefined('.$this->_protector_singleQuoted($op).')):'.$sPCT."\n";
+ elseif ($directive == 'ifndef')
+ $code .= $sPOT.'if (!$ccpp->_processor_isDefined('.$this->_protector_singleQuoted($op).')):'.$sPCT."\n";
+ //Non-standard one-operand directives
+ elseif ($directive == 'includephp')
+ $code .= $sPOT.'echo ($ccpp->_processor_includeAsPhp('.$this->_protector_singleQuoted($op).'));'.$sPCT."\n";
+ elseif ($directive == 'literal')
+ $code .= $op."\n";
+ // Zero-operand directives
+ elseif ($directive == 'else')
+ $code .= $sPOT.'else:'.$sPCT."\n";
+ elseif ($directive == 'endif')
+ $code .= $sPOT.'endif;'.$sPCT."\n";
+ elseif ($directive == 'error')
+ $code .= $sPOT.'$this->_processor_error('.$this->_protector_singleQuoted($op).', '.$value[2].');'.$sPCT."\n";
+ elseif ($directive == 'warning')
+ $code .= $sPOT.'$this->_processor_warning('.$this->_protector_singleQuoted($op).', '.$value[2].');'.$sPCT."\n";
+
+ } // END Directives
+ else $code .= strtr($value[1], $protectPOT); //Unrecognized token is appended as is
+ }
+ return $code; // Return compiled code
+ }
+ /**
+ * Compiler functions
+ */
+ protected function _compiler_macro(&$sourceTokens, $key, &$value, &$rSkip)
+ {
+ if ( $this->options['translate.functionMacros'] && isset($sourceTokens[$key+1]) && $sourceTokens[$key+1] == '(') { //Function-like macro
+ return 'echo '.$this->_compiler_macroFunction($sourceTokens, $key, $rSkip) . ';';
+ }
+ else //Object-like macro
+ return 'echo $ccpp->_processor_macro('.$this->_protector_singleQuoted($value[1]).');';
+ }
+ protected function _compiler_macroFunction(&$sourceTokens, $key, &$rSkip)
+ {
+ $skip = 0;
+ $k = 0;
+ $args = array();
+ for ($i = $key+2; (isset($sourceTokens[$i]) && ($iToken = $sourceTokens[$i]) != ')'); $i++) {
+ if ($skip > 0) {--$skip; continue;}
+ if (is_string($iToken)) {
+ if ($iToken == ',') {
+ if (isset($args[$k])){
+ $args[$k] = $this->_protector_singleQuoted($args[$k]);
+ ++$k;
+ }
+ }
+ else {
+ $args[$k] .= $iToken;
+ }
+ }
+ elseif ($iToken[0] == T_STRING) {
+ /* Another Function-like macro... nested macros are still unsupported
+ if ($sourceTokens[$i + 1] == '('){
+ $args[$k++] = $this->_compiler_macroFunction($sourceTokens, $i, $skip);
+ }
+ else {
+ $args[$k++] = '$ccpp->_processor_macro('.$this->_protector_singleQuoted($iToken[1]).')';
+ $args[$k] .= $iToken[1];
+ }*/
+ $args[$k] .= $iToken[1];
+ }
+ elseif ($iToken[0] != T_WHITESPACE)
+ $args[$k] .= $iToken[1];
+ }
+ if (isset($sourceTokens[$i])) { //There is matching closing bracket
+ if(isset($args[$k])) $args[$k] = $this->_protector_singleQuoted($args[$k]);
+ $rSkip = $i - $key;
+ return '$ccpp->_processor_macroFunction('.$this->_protector_singleQuoted($sourceTokens[$key][1]).(count($args)?',':'').implode(', ',$args).')';
+ }
+ else {
+ //$this->debug('No matching braket');
+ fwrite(STDERR, '#CCPP error: no matching bracket near line '.$sourceTokens[$key][2].' of the code');
+ exit (2);
+ }
+ }
+
+ /**
+ * Evaluator/preprocessor functions
+ */
+
+ protected function _processor_evaluate($code)
+ {
+ $ccpp = &$this; //Reference to the compiler
+ ob_start();
+ //Prepare compilation header and footer
+ $code = "\n?>" . $code . "<?php\n";
+ eval($code);
+ return ob_get_clean();
+ }
+ protected function _processor_macro($token)
+ {
+ if (isset($this->macros[$token])) return $this->macros[$token];
+ else return $token;
+ }
+ protected function _processor_macroFunction()
+ {
+ $argv = func_get_args();
+ $name = array_shift($argv);
+ if (isset($this->functionMacros[$name])) return vsprintf($this->functionMacros[$name], $argv);
+ else return $name.'('.implode(', ', $argv).')';
+ }
+
+ protected function _processor_include($operand)
+ {
+ preg_match_all('~("|<)(.*)\\1~', $operand, $matches);
+ if ($matches) {
+ $filename = $matches[2][0];
+ $parsedInclusion = substr($this->parseFilename($filename), $startOffset);
+ return $parsedInclusion;
+ }
+ else return '';
+ }
+ protected function _processor_includeAsPhp($operand)
+ {
+ preg_match_all('~("|<)(.*)\\1(\s+([0-9]+))?(\s+([0-9]+))?~', $operand, $matches);
+ if (count($matches[0])) {
+ $filename = $matches[2][0];
+ $startOffset = (int) $matches[4][0];
+ if ($maxLength = (int) $matches[6][0])
+ $code = file_get_contents($filename, $startOffset, $maxLength);
+ else
+ $code = file_get_contents($filename, $startOffset);
+
+ if ($code) {
+ $sPOT = "<?php\n";
+ $code = $sPOT.$code;
+ return substr($this->parse($code), strlen($sPOT));
+ }
+ }
+ return '';
+ }
+ protected function _processor_ifCondition($operand)
+ {
+ $operand = strtr($operand, $this->macros);
+ return eval('return ('.$operand.');');
+ }
+ protected function _processor_define($macroName, $macroCode)
+ {
+ if (($pos = strpos($macroName, '(')) !== false) { //Function-like macro
+ $name = substr($macroName, 0, $pos);
+ $argsString = substr($macroName, $pos + 1, -1);
+ $args = split('\s*,\s*', $argsString);
+ $macroCode = strtr($macroCode, array('%' => '%%'));
+ $replace_pairs = array_flip($args);
+ $i = 0;
+ foreach ($replace_pairs as &$value) $value = '%'.(++$i).'$s';
+ $macroCode = strtr($macroCode, $replace_pairs);
+ $macroCode = strtr($macroCode, $this->macros);
+ $macroCode = preg_replace('~[ ]*(?<!#)(##)(?!#)[ ]*~', '' , $macroCode); // HASH HASH replacement
+ $this->functionMacros[$name] = $macroCode;
+ }
+ else {
+ $macroCode = strtr($macroCode, $this->macros);
+ $macroCode = preg_replace('~[ ]*(?<!#)(##)(?!#)[ ]*~', '' , $macroCode); // HASH HASH replacement
+ $this->macros[$macroName] = $macroCode;
+ }
+ }
+ protected function _processor_isDefined($macroName)
+ {
+ return isset($this->macros[$macroName]);
+ }
+ protected function _processor_error($message, $line = __LINE__)
+ {
+ fwrite(STDERR, '#error: "'.$message.'" on line '.$line." in the source\n");
+ $this->_processor_exitClean(1);
+ }
+ protected function _processor_warning($message, $line = __LINE__)
+ {
+ fwrite(STDERR, '#warning: "'.$message.'" on line '.$line." in the source\n");
+ }
+ protected function _processor_exitClean($status = 0)
+ {
+ while(ob_get_level()) ob_end_clean();
+ exit ($status);
+ }
+ protected function _processor_clearBuffers()
+ {
+ while(ob_get_level()) ob_end_clean();
+ }
+ protected function _protector_quote($string) // Not utilized
+ {
+ return '"'.addcslashes($string).'"'; // C style
+ }
+ protected function _protector_escapeSingleQuote($string)
+ {
+ return addcslashes($string, '\'');
+ }
+ protected function _protector_singleQuoted($string)
+ {
+ return '\''.$this->_protector_escapeSingleQuote($string).'\'';
+ }
+} \ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e0faa18
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+# C Compatible Preprocessor for PHP
+
+## Features
+* trigraphs replacement
+* line-slicing
+* comment removal
+* directives: #if, #else, #elif, #endif, #include, #define, #warning, #error
+* non-standard directives: #includephp, #literal
+* Object-like macros
+* Function-like macros (Function-like macros in Function-like macros are not supported)
+* ## preprocessor operator
+
+## License
+The license is 2-clause BSD.
+That means it's open-source and you are free to distribute, modify, sell or use in a commercial software and many others, as long as you keep the copyright. \ No newline at end of file