diff --git a/libraries/sqlparser.lib.php3 b/libraries/sqlparser.lib.php3 index 79346510c..ec53136e1 100644 --- a/libraries/sqlparser.lib.php3 +++ b/libraries/sqlparser.lib.php3 @@ -9,23 +9,643 @@ * These functions define an SQL parser system, capable of understanding and * extracting data from a MySQL type SQL query. * + * The basic procedure for using the new SQL parser: + * On any page that needs to extract data from a query or to pretty-print a + * query, you need code like this up at the top: * + * ($sql contains the query) + * $parsedSQL = PMA_SQP_Parse($sql); * + * If you want to extract data from it then, you just need to run + * $SQLinfo = PMA_SQP_Analyze($parsedSQL); + * (returned structure of this function is being rewritten presently); * - * - * - * - * - * - * - * - * - * - * + * If you want a pretty-printed version of the query, do: + * $string = PMA_SQP_FormatHTML($parsedSQL); + * (note that that you need to have syntax.css.php3 included somehow in your + * page for it to work, I recommend '' at the moment.) * */ if (!defined('PMA_SQP_LIB_INCLUDED')) { define('PMA_SQP_LIB_INCLUDED', 1); + if(!defined('DEBUGTIMING')) { + function PMA_SQP_ArrayAdd(&$arr,$type,$data, &$arrsize) + { + $arr[] = array( 'type' => $type, 'data' => $data ); + $arrsize++; + } + } else { + function PMA_SQP_ArrayAdd(&$arr,$type,$data, &$arrsize) + { + global $timer; + $t = $timer; + $arr[] = array( 'type' => $type, 'data' => $data , 'time' => $t ); + $timer = microtime(); + $arrsize++; + } + } + + function PMA_SQP_Parse($sql) + { + $len = strlen($sql); + if($len == 0) { + return array(); + } + $sql_array = array(); + $sql_array['raw'] = $sql; + $count1 = 0; + $count2 = 0; + $punct_queryend = ';'; + $punct_qualifier = '.'; + $punct_listsep = ','; + $punct_level_plus = '('; + $punct_level_minus = ')'; + $digit_floatdecimal = '.'; + $digit_hexset = 'x'; + $bracket_list = '()[]{}'; + $allpunct_list = '-,;:!?/.^~\*&%+<=>|'; + $allpunct_list_pair = array ( + 0 => '!=', + 1 => '&&', + 2 => ':=', + 3 => '<<', + 4 => '<=', + 5 => '<=>', + 6 => '<>', + 7 => '>=', + 8 => '>>', + 9 => '||', + ); + + $allpunct_list_pair_size = 10; //count($allpunct_list_pair); + $quote_list = "\'\"\`"; + $arraysize = 0; + while($count2 < $len) { + $c = $sql[$count2]; + $count1 = $count2; + + if( ($c == "\n") ) { + $count2++; + PMA_SQP_ArrayAdd( $sql_array, 'white_newline', '', $arraysize); + continue; + } + + //check for white space + if(PMA_STR_IsSpace($c)) { + $count2++; + continue; + } + + // check for comment lines. + // MySQL style # + // C style /* */ + // ANSI style -- + if( ($c == '#') || (($count2+1 < $len) && ($c == '/') && ($sql[$count2+1] == '*')) || (($c == '-') && ($count2+2 < $len) && ($sql[$count2+1] == '-') && ($sql[$count2+2] == ' '))) { + $count2++; + $pos = 0; + $type = 'bad'; + switch($c) { + case '#': + $type = 'mysql'; + case '-': + $type = 'ansi'; + $pos = strpos($sql,"\n",$count2); + break; + case '/': + $type = 'c'; + $pos = strpos($sql,"*/",$count2); + $pos += 2; + break; + default: + break; + } + $count2 = ($pos < $count2) ? $len : $pos; + $str = substr($sql,$count1,$count2-$count1); + PMA_SQP_ArrayAdd ( $sql_array, 'comment_'.$type, $str, $arraysize); + continue; + } + + //check for something inside quotation marks + if(PMA_STR_StrInStr($c,$quote_list)) { + $startquotepos = $count2; + $quotetype = $c; + $count2++; + $escaped = FALSE; + $escaped_escaped = FALSE; + $pos = $count2; + $oldpos = 0; + do { + $oldpos = $pos; + $pos = strpos($sql,$quotetype,$oldpos); + // ($pos === FALSE) + if(!is_integer($pos)) { + trigger_error('Syntax: Unclosed quote ('.$quotetype.') at '.$startquotepos); + return; + } + + //if the quote is the first character, + //it can't be escaped, so don't do the rest of the code + if($pos == 0) { + break; + } + + if(PMA_STR_CharIsEscaped($sql,$pos)) { + $pos ++; + continue; + } else { + break; + } + } while ( $len > $pos ); + + $count2 = $pos; + $count2++; + $type = 'quote_'; + switch($quotetype) { + case "'": + $type .= 'single'; + break; + case "\"": + $type .= 'double'; + break; + case "`": + $type .= 'backtick'; + break; + default: + break; + } + $data = substr($sql, $count1, $count2-$count1); + PMA_SQP_ArrayAdd ( $sql_array, $type, $data, $arraysize ); + continue; + } + //check for brackets + if(PMA_STR_StrInStr($c,$bracket_list)) { + //all bracket tokens are only one item long + $count2++; + $type_type = ''; + if(PMA_STR_StrInStr($c,'([{')) { + $type_type = 'open'; + } else { + $type_type = 'close'; + } + $type_style = ''; + if(PMA_STR_StrInStr($c,'()')) { + $type_style = 'round'; + } elseif(PMA_STR_StrInStr($c,'[]')) { + $type_style = 'square'; + } else { + $type_style = 'curly'; + } + + $type = 'punct_bracket_'.$type_type.'_'.$type_style; + PMA_SQP_ArrayAdd ( $sql_array, $type, $c, $arraysize); + continue; + } + //check for punct + if(PMA_STR_StrInStr($c,$allpunct_list)) + { + while( ($count2 < $len) && PMA_STR_StrInStr($sql[$count2],$allpunct_list) ) { + $count2++; + } + $l = $count2-$count1; + if($l == 1) { + $punct_data = $c; + } else { + $punct_data = substr($sql,$count1,$l); + } + + //special case, sometimes, althought two characters are adjectent directly, + //they ACTUALLY need to be seperate + if( $l == 1 ) { + $t_suffix = ''; + switch($punct_data) { + case $punct_queryend: + $t_suffix = '_queryend'; + break; + case $punct_qualifier: + $t_suffix = '_qualifier'; + break; + case $punct_listsep: + $t_suffix = '_listsep'; + break; + default: + break; + } + PMA_SQP_ArrayAdd ( $sql_array, 'punct'.$t_suffix, $punct_data, $arraysize); + } elseif( PMA_STR_BinarySearchInArr($punct_data,$allpunct_list_pair,$allpunct_list_pair_size)) { + //Ok, we have one of the valid combined punct expressions + PMA_SQP_ArrayAdd ( $sql_array, 'punct', $punct_data, $arraysize ); + } else { + //bad luck, lets split it up more + $first = $punct_data[0]; + $first2 = $punct_data[0].$punct_data[1]; + $last2 = $punct_data[$l-2].$punct_data[$l-1]; + $last = $punct_data[$l-1]; + if(($first == ',') || ($first == ';') || ($first == '.') || $first = '*') { + $count2 = $count1 + 1; + $punct_data = $first; + } elseif(($last2 == '/*') || ($last2 == '--')) { + $count2-=2; + $punct_data = substr($sql,$count1,$count2-$count1); + } elseif(($last == '-') || ($last == '+') || ($last == '!')) { + $count2--; + $punct_data = substr($sql,$count1,$count2-$count1); + } else { + trigger_error('Syntax: Unknown punctation string ('.$punct_data.') at '.$count1); + return; + } + PMA_SQP_ArrayAdd ( $sql_array, 'punct', $punct_data, $arraysize); + continue; + } + continue; + } + //check for alpha + if( FALSE && PMA_STR_IsSqlIdentifier($c,FALSE) || ($c == '@')) { + $count2 ++; + $is_SQLvariable = ($c == '@'); + $is_Digit = (!$is_SQLvariable) && PMA_STR_IsDigit($c); + $is_HexDigit = ($is_Digit) && ($c == '0') && ($sql[$count2] == 'x'); + $is_FloatDigit = FALSE; + $is_FloatDigitExponent = FALSE; + + if($is_HexDigit) { + $count2++; + } + + + while(($count2 < $len) && PMA_STR_IsSqlIdentifier($sql[$count2],$is_SQLvariable || $is_Digit)) { + $c2 = $sql[$count2]; + if($is_SQLvariable && ($c2 == '.')) { + $count2++; + continue; + } + if($is_Digit && (!$is_HexDigit) && ($c2 == '.')) { + $count2++; + if(!$is_FloatDigit) { + $is_FloatDigit = TRUE; + continue; + } else { + trigger_error('Syntax: Invalid Identifer ('.substr($sql,$count1,$count2-$count1).') at '.$count1); + return; + } + } + if($is_Digit && (!$is_HexDigit) && (($c2 == 'e') || ($c2 == 'E'))) { + if(!$is_FloatDigitExponent) { + $is_FloatDigitExponent = TRUE; + $is_FloatDigit = TRUE; + $count2++; + continue; + } else { + $is_Digit = FALSE; + $is_FloatDigit = FALSE; + } + } + if( ($is_HexDigit && PMA_STR_IsHexDigit($c2)) || ($is_Digit && PMA_STR_IsDigit($c2))) { + $count2++; + continue; + } else { + $is_Digit = FALSE; + $is_HexDigit = FALSE; + } + + $count2++; + } + + + $l = $count2-$count1; + $str = substr($sql,$count1,$l); + + $type = ''; + if($is_Digit) { + $type = 'digit'; + if($is_FloatDigit) { + $type .= '_float'; + } elseif($is_HexDigit) { + $type .= '_hex'; + } else { + $type .= '_integer'; + } + } else { + if($is_SQLvariable != FALSE) { + $type = 'alpha_variable'; + } else { + $type = 'alpha'; + } + } + PMA_SQP_ArrayAdd ( $sql_array, $type, $str, $arraysize ); + + continue; + } + + //DEBUG + $count2++; + echo 'You seem to have found a bug in the SQL parser.
Please submit a bug report with the data chunk below:
--BEGIN CUT--
'; + $debugstr = '$Id$
'; + $debugstr .= 'Why did we get here? '.$count1.' '.$count2.' '.$len.'
'."\n"; + $debugstr .= 'Leftover: '.substr($sql,$count1,$count2-$count1).'
'."\n"; + $debugstr .= 'A: '.$count1.' '.$count2.'
'."\n"; + $debugstr .= 'SQL: '.$sql; + $encodedstr = nl2br(chunk_split(base64_encode(gzcompress($debugstr,9)))); + echo $encodedstr; + echo '---END CUT---
'; + //$decodedstr = str_replace('
','', base64_decode(gzuncompress($encodedstr))); + $decodedstr = gzuncompress(base64_decode(str_replace('
','',$encodedstr))); + echo $decodedstr; + flush(); + ob_flush(); + die(); + + } + + global $syntax_columnAttrib, $syntax_reservedWord, $syntax_columnType, $syntax_functionName; + $len_columnAttrib = count($syntax_columnAttrib); + $len_reservedWord = count($syntax_reservedWord); + $len_columnType = count($syntax_columnType); + $len_functionName = count($syntax_functionName); + + if($arraysize > 0) { + $t_next = $sql_array[0]['type']; + $t_prev = NULL; + } + + for($i = 0; $i < $arraysize; $i++) { + $t_prev = $t_cur; + $t_cur = $t_next; + if(($i+1)<$arraysize) { + $t_next = $sql_array[$i+1]['type']; + } else { + $t_next = NULL; + } + if($t_cur == 'alpha') { + $t_suffix = '_identifier'; + $d_cur_upper = strtoupper($sql_array[$i]['data']); + if( ($t_next == 'punct_qualifier') || ($t_prev == 'punct_qualifier')) { + $t_suffix = '_identifier'; + } elseif( ($t_next == 'punct_bracket_open_round') && PMA_STR_BinarySearchInArr($d_cur_upper,$syntax_functionName,$len_functionName)) { + $t_suffix = '_functionName'; + } elseif(PMA_STR_BinarySearchInArr($d_cur_upper,$syntax_reservedWord,$len_reservedWord)) { + $t_suffix = '_reservedWord'; + } elseif(PMA_STR_BinarySearchInArr($d_cur_upper,$syntax_columnType,$len_columnType)) { + $t_suffix = '_columnType'; + } elseif(PMA_STR_BinarySearchInArr($d_cur_upper,$syntax_columnAttrib,$len_columnAttrib)) { + $t_suffix = '_columnAttrib'; + } else { + // Do nothing + } + $sql_array[$i]['type'] .= $t_suffix; + } + } + + // Store the size of the array inside the array, as count() is a slow operation. + $sql_array['len'] = $arraysize; + + // Send the data back + return $sql_array; + + } + + + function PMA_SQP_Analyze($arr) { + $result = array(); + $size = $arr['len']; + $subresult = array( + 'querytype' => '', + 'list_db' => array(), + 'list_tbl' => array(), + 'list_tbl_alias' => array(), + 'list_col' => array(), + 'list_col_alias' => array(), + ); + $subresult_empty = $subresult; + $seek_queryend = FALSE; + + $supportedQueryTypes = array( + 'SELECT', + 'UPDATE', + 'DELETE', + 'INSERT', + 'REPLACE', + 'TRUNCATE' + /* + // Support for these additional query types will come later on. + // They are not needed yet + 'EXPLAIN', + 'DESCRIBE', + 'SHOW', + 'CREATE', + 'SET', + 'ALTER' + */ + ); + $supportedQueryTypes_size = count($supportedQueryTypes); + + for($i=0;$i <= $size; $i++) { + // High speed seek for locating the end of the current query + if($seek_queryend == TRUE) { + if($arr[$i]['type'] == 'punct_queryend') { + $seek_queryend = FALSE; + } else { + continue; + } + } + + switch($arr[$i]['type']) { + case 'punct_queryend': + $result[] = $subresult; + $subresult = $subresult_empty; + break; + case 'alpha_reservedWord': + // We don't know what type of query yet, so run this + if($subresult['querytype'] == '') { + $subresult['querytype'] = strtoupper($arr[$i]['data']); + } + // Check if we support this type of query + if(! PMA_STR_BinarySearchInArr($subresult['querytype'],$supportedQueryTypes,$supportedQueryTypes_size)) { + // Skip ahead to the next one if we don't + $seek_queryend = TRUE; + } + break; + default: + break; + } + + switch($subresult['querytype']) { + case 'SELECT': + break; + default: + break; + } + + } + + // They are are naughty and didn't have a trailing semi-colon, then still handle it properly + if($subresult['querytype'] != '') { + $result[] = $subresult; + } + + echo '
';
+        print_r($result);
+        echo '
'; + } + + function PMA_SQP_FormatHTML_colorize($arr) { + $i = strpos($arr['type'],'_'); + $class = ''; + if($i > 0) { + $class = 'syntax_'.substr($arr['type'],0,$i).' '; + } + + $class .= 'syntax_'.$arr['type']; + return ''.htmlspecialchars($arr['data']).''; + } + + function PMA_SQP_FormatHTML($arr) { + $str = ''; + $indent = 0; + $bracketlevel = 0; + $functionlevel = 0; + $infunction = FALSE; + $space_punct_listsep = ' '; + $space_punct_listsep_functionName = ' '; + $space_alpha_reservedWord = '
'."\n"; + $keywordsWithBrackets = array( + 'INDEX', + 'INTO', + 'KEY', + 'PRIMARY', + 'REFERENCES', + 'UNIQUE' + ); + $keywordsWithBrackets_size = count($keywordsWithBrackets); + $arraysize = $arr['len']; + $typearr = array(); + if($arraysize >= 0) { + /* array_push($typearr,NULL); + array_push($typearr,NULL); + array_push($typearr,NULL); + array_push($typearr,$arr[0]['type']); + array_push($typearr,$arr[1]['type']); */ + + $typearr[0] = NULL; + $typearr[1] = NULL; + $typearr[2] = NULL; + $typearr[3] = $arr[0]['type']; + } + + for($i = 0; $i < $arraysize; $i++) { + $before = ''; + $after = ''; + $indent = 0; + // array_shift($typearr); + /* + 0 prev2 + 1 prev + 2 current + 3 next + */ + if(($i+1)<$arraysize) { + //array_push($typearr,$arr[$i+1]['type']); + $typearr[4] = $arr[$i+1]['type']; + } else { + //array_push($typearr,NULL); + $typearr[4] = NULL; + } + + for($j=0;$j<4;$j++) { + $typearr[$j] = $typearr[$j+1]; + } + + switch($typearr[2]) { + case 'white_newline': + $after = '
'; + $before = ''; + break; + case 'punct_bracket_open_round': + $bracketlevel++; + $infunction = FALSE; + //make sure this array is sorted! + if( ($typearr[1] == 'alpha_functionName') || ($typearr[1] == 'alpha_columnType') || ($typearr[1] == 'punct') || ($typearr[3] == 'digit_integer') || ($typearr[3] == 'digit_hex') || ($typearr[3] == 'digit_float') || ( ( $typearr[0] == 'alpha_reservedWord' ) && ( PMA_STR_BinarySearchInArr(strtoupper($arr[$i-2]['data']),$keywordsWithBrackets,$keywordsWithBrackets_size))) ) { + $functionlevel++; + $infunction = TRUE; + $after .= ' '; + } else { + $indent++; + $after .= '
'."\n"; + } + break; + case 'punct_qualifier': + break; + case 'punct_listsep': + if($infunction == TRUE) { + $after .= $space_punct_listsep_functionName; + } else { + $after .= $space_punct_listsep; + } + break; + case 'punct_queryend': + if(($typearr[3] != 'white_newline') && ($typearr[3] != 'comment_mysql')&& ($typearr[3] != 'comment_ansi') ) { + $after .= '
'."\n"; + } + break; + case 'comment': + break; + case 'punct_bracket_close_round': + $bracketlevel--; + if($infunction == TRUE) { + $functionlevel--; + $after .= ' '; + } else { + $indent--; + $before .= '
'; + } + $infunction = ($functionlevel > 0) ? TRUE : FALSE; + break; + + case 'alpha_reservedWord': + if( ($typearr[1] != 'alpha_reservedWord') && ($typearr[1] != 'punct_level_plus') && ($typearr[1] != 'white_newline')) { + $before .= $space_alpha_reservedWord; + } + + switch(strtoupper($arr[$i]['data'])) { + case 'CREATE': + $space_punct_listsep = '
'."\n"; + $space_alpha_reservedWord = ' '; + break; + case 'UPDATE': + $space_punct_listsep = '
'."\n"; + $space_alpha_reservedWord = ' '; + break; + case 'INSERT': + $space_punct_listsep = '
'."\n"; + $space_alpha_reservedWord = '
'."\n"; + break; + case 'VALUES': + $space_punct_listsep = ' '; + $space_alpha_reservedWord = '
'."\n"; + break; + case 'SELECT': + $space_punct_listsep = ' '; + $space_alpha_reservedWord = '
'."\n"; + break; + default: + break; + } + + $after .= " "; + break; + + default: + break; + } + + if($typearr[3] != 'punct_qualifier') { + $after .= ' '; + } + $str .= $before.PMA_SQP_FormatHTML_colorize($arr[$i]).$after; + } + return $str; + } + } // $__PMA_SQP_LIB__