Skip to content

Commit

Permalink
Merge pull request #237 from deliciousbrains/use-preg-split-for-token…
Browse files Browse the repository at this point in the history
…izing

Use regex to split the SQL statement into tokens.
  • Loading branch information
greenlion committed May 16, 2017
2 parents db57e3a + 02a5528 commit 06e0611
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 57 deletions.
79 changes: 55 additions & 24 deletions src/PHPSQLParser/lexer/LexerSplitter.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,41 +56,72 @@ class LexerSplitter {
protected static $splitters = array("<=>", "\r\n", "!=", ">=", "<=", "<>", "<<", ">>", ":=", "\\", "&&", "||", ":=",
"/*", "*/", "--", ">", "<", "|", "=", "^", "(", ")", "\t", "\n", "'", "\"", "`",
",", "@", " ", "+", "-", "*", "/", ";");
protected $tokenSize;
protected $hashSet;

/**
* @var string Regex string pattern of splitters.
*/
protected $splitterPattern;

/**
* Constructor.
*
* It initializes some fields.
*/
public function __construct() {
$this->tokenSize = strlen(self::$splitters[0]); // should be the largest one
$this->hashSet = array_flip(self::$splitters);
$this->splitterPattern = $this->convertSplittersToRegexPattern( self::$splitters );
}

/**
* Get the maximum length of a split token.
*
* The largest element must be on position 0 of the internal $_splitters array,
* so the function returns the length of that token. It must be > 0.
*
* @return int The number of characters for the largest split token.
*/
public function getMaxLengthOfSplitter() {
return $this->tokenSize;
/**
* Get the regex pattern string of all the splitters
*
* @return string
*/
public function getSplittersRegexPattern () {
return $this->splitterPattern;
}

/**
* Looks into the internal split token array and compares the given token with
* the array content. It returns true, if the token will be found, false otherwise.
*
* @param String $token a string, which could be a split token.
*
* @return boolean true, if the given string will be a split token, false otherwise
*/
public function isSplitter($token) {
return isset($this->hashSet[$token]);
/**
* Convert an array of splitter tokens to a regex pattern string.
*
* @param array $splitters
*
* @return string
*/
public function convertSplittersToRegexPattern( $splitters ) {
$regex_parts = array();
foreach ( $splitters as $part ) {
$part = preg_quote( $part );

switch ( $part ) {
case "\r\n":
$part = '\r\n';
break;
case "\t":
$part = '\t';
break;
case "\n":
$part = '\n';
break;
case " ":
$part = '\s';
break;
case "/":
$part = "\/";
break;
case "/\*":
$part = "\/\*";
break;
case "\*/":
$part = "\*\/";
break;
}

$regex_parts[] = $part;
}

$pattern = implode( '|', $regex_parts );

return '/(' . $pattern . ')/';
}
}

Expand Down
34 changes: 1 addition & 33 deletions src/PHPSQLParser/lexer/PHPSQLLexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,39 +85,7 @@ public function split($sql) {
throw new InvalidParameterException($sql);
}

$tokens = array();
$token = "";

$splitLen = $this->splitters->getMaxLengthOfSplitter();
$found = false;
$len = strlen($sql);
$pos = 0;

while ($pos < $len) {

for ($i = $splitLen; $i > 0; $i--) {
$substr = substr($sql, $pos, $i);
if ($this->splitters->isSplitter($substr)) {

if ($token !== "") {
$tokens[] = $token;
}

$tokens[] = $substr;
$pos += $i;
$token = "";

continue 2;
}
}

$token .= $sql[$pos];
$pos++;
}

if ($token !== "") {
$tokens[] = $token;
}
$tokens = preg_split($this->splitters->getSplittersRegexPattern(), $sql, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);

$tokens = $this->concatEscapeSequences($tokens);
$tokens = $this->balanceBackticks($tokens);
Expand Down

0 comments on commit 06e0611

Please sign in to comment.