From bee8320e49cc886e9227ccb421c1056a58c5be77 Mon Sep 17 00:00:00 2001 From: JLacoude Date: Sun, 22 Jan 2017 18:47:28 +0100 Subject: [PATCH 1/3] Adding test using a multibyte character in postgres identifier. --- tests/Parser/PgsqlParserTest.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/Parser/PgsqlParserTest.php b/tests/Parser/PgsqlParserTest.php index 75e93908..638d4417 100644 --- a/tests/Parser/PgsqlParserTest.php +++ b/tests/Parser/PgsqlParserTest.php @@ -208,6 +208,10 @@ public function testDollarQuotedStrings() $sql = 'SELECT $outer$ nested strings $inner$:foo$inner$ $outer$'; $parsedQuery = $this->parseSingleQuery($sql, $parameters); $this->assertEquals($sql, $parsedQuery->getString()); + + $sql = 'SELECT $€$hello$€$'; + $parsedQuery = $this->parseSingleQuery($sql, $parameters); + $this->assertEquals($sql, $parsedQuery->getString()); } public function testTypeCasting() From f19b37f0c467dd2cf4103f214c3ce8aa33bf162b Mon Sep 17 00:00:00 2001 From: JLacoude Date: Sun, 22 Jan 2017 19:14:31 +0100 Subject: [PATCH 2/3] Removed the call to State->capture from State->getIdentifier --- src/Parser/State.php | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/Parser/State.php b/src/Parser/State.php index 41304607..b7f957a8 100644 --- a/src/Parser/State.php +++ b/src/Parser/State.php @@ -69,6 +69,8 @@ class State */ protected $new_statement_character_found = false; + protected $valid_placeholder_characters = []; + /** * * Constructor @@ -90,6 +92,12 @@ public function __construct($statement, $values = array(), $charset = 'UTF-8') if (array_key_exists(0, $this->values)) { array_unshift($this->values, null); } + $this->valid_placeholder_characters = array_merge( + range('a', 'z'), + range ('A', 'Z'), + range (0, 9), + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_'] + ); } /** @@ -314,7 +322,20 @@ public function getCharset() */ public function getIdentifier() { - return $this->capture('\\w+\\b'); + $identifier = ''; + $length = 0; + while (! $this->done()) + { + $character = mb_substr($this->statement, $this->current_index + $length, 1, $this->charset); + if (! in_array($character, $this->valid_placeholder_characters, true)) + { + return $identifier; + } + $identifier .= $character; + $length++; + + } + return $identifier; } /** From 85c0df119c123ce201261e132d7fd1ebbe1a6d25 Mon Sep 17 00:00:00 2001 From: JLacoude Date: Sun, 22 Jan 2017 19:56:39 +0100 Subject: [PATCH 3/3] Removed the State->capture method --- src/Parser/AbstractParser.php | 11 +++++++-- src/Parser/PgsqlParser.php | 28 ++++++++++++++++++++--- src/Parser/State.php | 39 +++++++++----------------------- tests/Parser/PgsqlParserTest.php | 8 +++++-- 4 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/Parser/AbstractParser.php b/src/Parser/AbstractParser.php index edec2567..f10196ea 100644 --- a/src/Parser/AbstractParser.php +++ b/src/Parser/AbstractParser.php @@ -218,8 +218,15 @@ protected function handleNumberedParameter($state) */ protected function handleSemiColon($state) { - $uselessCharacters = $state->capture(';\\s*'); - $state->passString($uselessCharacters); + while (! $state->done()) + { + $character = $state->getCurrentCharacter(); + if (! in_array($character, array(';', "\r", "\n", "\t", " "), true)) + { + break; + } + $state->passString($character); + } $state->setNewStatementCharacterFound(true); } diff --git a/src/Parser/PgsqlParser.php b/src/Parser/PgsqlParser.php index be584555..45d69f0e 100644 --- a/src/Parser/PgsqlParser.php +++ b/src/Parser/PgsqlParser.php @@ -90,10 +90,22 @@ protected function handlePossibleCStyleString($state) } if (! $inCString) { // Checking if we have blank characters until next quote. In which case it is the same string - $blanks = $state->capture("\\s*'"); + $offset = 1; + $blanks = true; + while (! $state->done()) { + $characterAtOffset = $state->getCharacterFromCurrent($offset); + if ($characterAtOffset === "'") { + break; + } + if (! in_array($characterAtOffset, array(" ", "\n", "\r", "\t"), true)) { + $blanks = false; + break; + } + $offset ++; + } if ($blanks) { - $state->copyUntilCharacter("'"); $state->copyCurrentCharacter(); + $state->copyUntilCharacter("'"); $inCString = true; } } @@ -110,7 +122,17 @@ protected function handlePossibleCStyleString($state) */ protected function handleDollar($state) { - $identifier = $state->capture('\\$([a-zA-Z_]\\w*)*\\$'); + $identifier = '$'; + $offset = 1; + while (! $state->done()) { + $character = $state->getCharacterFromCurrent($offset); + $identifier .= $character; + if ($character === '$') { + break; + } + $offset ++; + } + if ($identifier) { // Copy until the end of the starting tag $state->copyUntilCharacter($identifier); diff --git a/src/Parser/State.php b/src/Parser/State.php index b7f957a8..64f60ff6 100644 --- a/src/Parser/State.php +++ b/src/Parser/State.php @@ -96,7 +96,7 @@ public function __construct($statement, $values = array(), $charset = 'UTF-8') range('a', 'z'), range ('A', 'Z'), range (0, 9), - ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_'] + array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_') ); } @@ -162,6 +162,16 @@ public function getCurrentCharacter() return mb_substr($this->statement, $this->current_index, 1, $this->charset); } + /** + * Returns the character $n position after the current character + * @param $n + * @return string + */ + public function getCharacterFromCurrent($n) + { + return mb_substr($this->statement, $this->current_index + $n, 1, $this->charset); + } + /** * * Returns the modified SQL query @@ -338,33 +348,6 @@ public function getIdentifier() return $identifier; } - /** - * - * Tries to matche a regular expression starting at current index and returns the result - * - * @param string $regexp - * @param int $capture_group - * @return string - */ - public function capture($regexp, $capture_group = 0) - { - $capture = ''; - if ($this->last_index <= $this->current_index) { - return $capture; - } - $regexp = "/\G{$regexp}/u"; - if (preg_match_all( - $regexp, - $this->statement, - $matches, - PREG_SET_ORDER, - $this->current_index - )) { - $capture = isset($matches[$capture_group][0]) ? $matches[$capture_group][0] : ''; - } - return $capture; - } - /** * * Stores a value in the final array of values to bind. If the name is already in use, generate a new name diff --git a/tests/Parser/PgsqlParserTest.php b/tests/Parser/PgsqlParserTest.php index 638d4417..41da8908 100644 --- a/tests/Parser/PgsqlParserTest.php +++ b/tests/Parser/PgsqlParserTest.php @@ -176,8 +176,8 @@ public function testCStyleStringConstants() $this->assertEquals($sql, $parsedQuery->getString()); $sql = <<parseSingleQuery($sql, $parameters); $this->assertEquals($sql, $parsedQuery->getString()); @@ -212,6 +212,10 @@ public function testDollarQuotedStrings() $sql = 'SELECT $€$hello$€$'; $parsedQuery = $this->parseSingleQuery($sql, $parameters); $this->assertEquals($sql, $parsedQuery->getString()); + + $sql = 'SELECT $€$hello$€'; + $parsedQuery = $this->parseSingleQuery($sql, $parameters); + $this->assertEquals($sql, $parsedQuery->getString()); } public function testTypeCasting()