From 02407e086df00d06ee8f9972954f412af16f11f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Mon, 19 Sep 2016 19:36:00 -0400 Subject: [PATCH 01/16] make `rtoken()->expected()` compatible with future heuristics --- src/Token.php | 2 -- src/parsers.php | 5 +++-- tests/ParserTest.php | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Token.php b/src/Token.php index 840e25f..d59fa63 100644 --- a/src/Token.php +++ b/src/Token.php @@ -10,7 +10,6 @@ class Token implements \JsonSerializable { const ANY = 1021, NONE = 1032, - MATCH = 1043, OPERATOR = 1054, CLOAKED = 1065 ; @@ -21,7 +20,6 @@ class Token implements \JsonSerializable { const TOKENS = [ self::ANY => 'ANY', self::NONE => 'NONE', - self::MATCH => 'MATCH', self::OPERATOR => 'OPERATOR', self::CLOAKED => 'CLOAKED' ]; diff --git a/src/parsers.php b/src/parsers.php index 5f46634..ec78c50 100644 --- a/src/parsers.php +++ b/src/parsers.php @@ -68,12 +68,13 @@ protected function parser(TokenStream $ts, string $regexp) /*: Result|null*/ return new Ast($this->label, $token); } - return $this->error($ts); + if ($this->errorLevel === Error::ENABLED) + return new Error(new Expected(new Token(T_STRING, "matching '{$regexp}'")), $ts->current(), $ts->last()); } function expected() : Expected { - return new Expected(new Token(Token::MATCH, $this->stack[0])); + return new Expected(new Token(T_STRING)); } function isFallible() : bool diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 6f17b28..4731088 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -121,7 +121,7 @@ function testRtokenOnError() { $this->parseError( $ts, rtoken('/^T_\w+$/'), - "Unexpected T_STRING(T_) on line 1, expected MATCH(/^T_\w+$/)." + "Unexpected T_STRING(T_) on line 1, expected T_STRING(matching '/^T_\w+$/')." ); } From 2d6f02927de9d9c2c90249b135f2c358c5d96c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Fri, 27 Jan 2017 19:42:05 -0400 Subject: [PATCH 02/16] =?UTF-8?q?add=20`=C2=B7=C2=B7unvar`=20to=20expaners?= =?UTF-8?q?=20cc=20@assertchris?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/expanders.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/expanders.php b/src/expanders.php index 829311b..5ad69c9 100644 --- a/src/expanders.php +++ b/src/expanders.php @@ -19,6 +19,18 @@ function stringify(TokenStream $ts) : TokenStream { ; } +function unvar(TokenStream $ts) : TokenStream { + $str = preg_replace('/^\$+/', '', (string) $ts); + + return + TokenStream::fromSequence( + new Token( + T_CONSTANT_ENCAPSED_STRING, $str + ) + ) + ; +} + function concat(TokenStream $ts) : TokenStream { $ts->reset(); $buffer = ''; From 3d026a8e9e8b9253cf0799784e274f3f3d47381e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Fri, 27 Jan 2017 19:56:24 -0400 Subject: [PATCH 03/16] =?UTF-8?q?allow=20optional=20ast=20unfold=20with=20?= =?UTF-8?q?`=C2=B7foo=20=3F=C2=B7=C2=B7=C2=B7=20{/*=20expansion=20*/}`=20s?= =?UTF-8?q?yntax?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cc @assertchris --- src/Expansion.php | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Expansion.php b/src/Expansion.php index 3981722..e5401ca 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -119,6 +119,8 @@ private function compile(array $expansion, Map $context) : TokenStream { ( rtoken('/^·\w+|···\w+$/')->as('label') , + optional(token('?'))->as('optional') + , operator('···') , optional @@ -129,7 +131,8 @@ private function compile(array $expansion, Map $context) : TokenStream { braces()->as('expansion') ) ->onCommit(function(Ast $result) use($cg) { - $this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + if (null !== $result->optional) + $this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); $this->constant = false; }) , @@ -198,6 +201,8 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives ( rtoken('/^·\w+|···\w+$/')->as('label') , + optional(token('?'))->as('optional') + , operator('···') , optional @@ -211,7 +216,12 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives ->onCommit(function(Ast $result) use($states) { $cg = $states->current(); - $context = $cg->this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + if (null !== $result->optional) + $context = $cg->this->lookupContextOptional($result->label, $cg->context); + else + $context = $cg->this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + + if ($context === null) return; $expansion = TokenStream::fromSlice($result->expansion); $delimiters = $result->delimiters; @@ -325,4 +335,10 @@ private function lookupContext(Token $token, Context $context, string $error) /* return $result; } + + private function lookupContextOptional(Token $token, Context $context) /*: Token | []Token*/ { + $symbol = (string) $token; + + return $context->get($symbol); + } } From 7e3abbe5459715bb1695cb54f2b70772b9cc5e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Fri, 27 Jan 2017 19:43:00 -0400 Subject: [PATCH 04/16] add property accessors generic macro cc @assertchris --- tests/phpt/property_accessors.phpt | 133 +++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 tests/phpt/property_accessors.phpt diff --git a/tests/phpt/property_accessors.phpt b/tests/phpt/property_accessors.phpt new file mode 100644 index 0000000..140d5f7 --- /dev/null +++ b/tests/phpt/property_accessors.phpt @@ -0,0 +1,133 @@ +--TEST-- +General property --pretty-print +--FILE-- +> { + ·class { + use \Pre\AccessorsTrait; + + ···body + } +} + +macro { + private T_VARIABLE·variable { + ·repeat + ( + ·either + ( + ·chain + ( + get, + ·optional(·chain(·token(':'), ·ns()·_))·getter_return_type, + ·between(·token('{'), ·layer(), ·token('}'))·getter_body + ) + ·getter + , + ·chain + ( + set, + ·token('('), + ·layer()·setter_args, + ·token(')'), + ·optional(·chain(·token(':'), ·ns()·_))·setter_return_type, + ·between(·token('{'), ·layer(), ·token('}'))·setter_body + ) + ·setter + , + ·chain + ( + unset, + ·optional(·chain(·token(':'), ·ns()·_))·unsetter_return_type, + ·between(·token('{'), ·layer(), ·token('}'))·unsetter_body + ) + ·unsetter + ) + ) + ·accessors + }; +} >> { + private T_VARIABLE·variable; + + ·accessors ··· { + ·setter ?··· { + private function ··concat(__set_ ··unvar(T_VARIABLE·variable))(·setter_args) ·setter_return_type { + ·setter_body + } + + } + + ·getter ?··· { + private function ··concat(__get_ ··unvar(T_VARIABLE·variable))() ·getter_return_type { + ·getter_body + } + } + + ·unsetter ?··· { + private function ··concat(__unset_ ··unvar(T_VARIABLE·variable))() ·unsetter_return_type { + ·unsetter_body + } + } + } +} + +namespace App; + +class Sprocket +{ + private $type { + get :string { + return $this->type; + } + + set(string $value) { + $this->type = $value; + } + + unset { + $this->type = ''; + } + }; + + private $name { + get :string { + return $this->name; + } + }; +} + +?> +--EXPECTF-- +type = $value; + } + private function __get_type() : string + { + return $this->type; + } + private function __unset_type() + { + $this->type = ''; + } + private $name; + private function __get_name() : string + { + return $this->name; + } +} + +?> From 4ed681b404f057a5cf96ca942011a9d123e0beab Mon Sep 17 00:00:00 2001 From: Christopher Pitt Date: Mon, 30 Jan 2017 08:03:43 +1300 Subject: [PATCH 05/16] Updated PHP Parser version --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index df84b28..0288290 100644 --- a/composer.json +++ b/composer.json @@ -18,7 +18,7 @@ "php": "7.*", "ext-mbstring": "*", "ext-tokenizer": "*", - "nikic/php-parser": "^2.0" + "nikic/php-parser": "^2.1|^3.0" }, "autoload": { "files": [ From 8a7ea4f276ee6838577dc713f6f7298185661e54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Tue, 31 Jan 2017 13:19:10 -0200 Subject: [PATCH 06/16] remove deprecated composer --dev flag from .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e6aac36..3180f27 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ sudo: false before_script: - composer self-update - composer require satooshi/php-coveralls:dev-master --no-update --dev - - composer install --dev --prefer-source + - composer install --prefer-source script: - php vendor/bin/phpunit From b4ff6d51ad8466fbbd8bc467d0e06e95ffcb3408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Tue, 31 Jan 2017 13:20:17 -0200 Subject: [PATCH 07/16] =?UTF-8?q?allow=20Expected=20to=20be=20a=20negation?= =?UTF-8?q?=20+=20add=20tests=20to=20`=C2=B7not()`=20error=20messa?= =?UTF-8?q?ge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Error.php | 15 +-------------- src/Expected.php | 27 ++++++++++++++++++++++++++- src/parsers.php | 2 +- tests/ParserTest.php | 26 ++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/Error.php b/src/Error.php index 14c40cd..a9a79aa 100644 --- a/src/Error.php +++ b/src/Error.php @@ -61,20 +61,7 @@ function message() : string { $messages = []; foreach ($errors as $prefix => $expected) { - $messages[] = $prefix . sprintf( - self::EXPECTED, - implode( - ' or ', - array_unique( - array_map( - function(Token $t) { - return $t->dump(); - }, - $expected->all() - ) - ) - ) - ); + $messages[] = $prefix . sprintf(self::EXPECTED, (string) $expected); } return implode(PHP_EOL, $messages); diff --git a/src/Expected.php b/src/Expected.php index 3a1a268..287e960 100644 --- a/src/Expected.php +++ b/src/Expected.php @@ -4,7 +4,7 @@ class Expected { - protected $tokens; + protected $tokens, $negation = false; function __construct(Token ...$tokens) { $this->tokens = $tokens; @@ -18,8 +18,33 @@ function all() : array { return $this->tokens; } + function negate() : self { + $expected = clone $this; + $expected->negation = true; + + return $expected; + } + + function __toString() : string { + return + ($this->negation ? 'not ' : '') . + implode( + ' or ' . ($this->negation ? 'not ' : ''), + array_unique( + array_map( + function(Token $t) { + return $t->dump(); + }, + $this->all() + ) + ) + ) + ; + } + function raytrace() : string { return + ($this->negation ? 'not ' : '') . implode( ' | ', array_map( diff --git a/src/parsers.php b/src/parsers.php index ec78c50..c1ed4ad 100644 --- a/src/parsers.php +++ b/src/parsers.php @@ -798,7 +798,7 @@ protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ function expected() : Expected { - return new Expected; + return $this->stack[0]->expected()->negate(); } function isFallible() : bool diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 4731088..001f4e4 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -208,6 +208,32 @@ function testOptional() { $this->parseSuccess($ts, optional(token(T_STRING, 'baz')), ""); } + function testNot() { + $ts = TokenStream::fromSource("parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseSuccess($ts, repeat(chain(not(token(T_STRING, 'null')), token(T_STRING))), "T_STRING(foo), T_STRING(bar)"); + + $ts = TokenStream::fromSource("parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseSuccess($ts, repeat(chain(not(token(T_STRING, 'null')), token(T_STRING))), "T_STRING(foo), T_STRING(bar)"); + + $ts = TokenStream::fromSource("parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseError( + $ts, + repeat(chain(not(token(T_STRING, 'null')), token(T_STRING))), + "Unexpected T_STRING(null) on line 1, expected not T_STRING(null)." + ); + + $ts = TokenStream::fromSource("parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseError( + $ts, + repeat(chain(not(either(token(T_STRING, 'null'), token(T_STRING, 'true'), token(T_STRING, 'false'))), token(T_STRING))), + "Unexpected T_STRING(null) on line 1, expected not T_STRING(null) or not T_STRING(true) or not T_STRING(false)." + ); + } + function testRepeat() { $ts = TokenStream::fromSource(" Date: Wed, 22 Feb 2017 23:33:31 -0300 Subject: [PATCH 08/16] rtoken() should expect T_VARIABLE too --- src/parsers.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsers.php b/src/parsers.php index c1ed4ad..16f554f 100644 --- a/src/parsers.php +++ b/src/parsers.php @@ -74,7 +74,7 @@ protected function parser(TokenStream $ts, string $regexp) /*: Result|null*/ function expected() : Expected { - return new Expected(new Token(T_STRING)); + return new Expected(new Token(T_STRING), new Token(T_VARIABLE)); } function isFallible() : bool From 4e4297207072647c34a0c2d4b3b3c0aa2b4040cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Mon, 3 Oct 2016 03:36:27 -0400 Subject: [PATCH 09/16] refactor `layer()` parser and fixes old known heuristic bugs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The layer() matcher had known bugs regarding matching pair heuristics which caused source like `{([)}]` to match as false positive :P Yea... it was lousy, but it was good enough for the job and the engine would catch the syntax error later anyway. The new behavior is: { ( [ ) ] } ^ syntax error unexpected ')' expected ']' This means that all layer matching will be quite strict from now on. This also makes `layer()` a tad bit slower. A better approach could be mapping closing/opening pairs of tokens ahead of time, when building the token stream, and make layer matching very close to O(1). Routine could be like.: ``` class TokenStream { ... function mapPairs() { stack = []; foreach_node_on_token_stream(current) { switch (current.token.type) { case '{': case '(': case '[': T_CURLY_OPEN: T_DOLLAR_OPEN_CURLY_BRACES: $stack[] = current; break; case '}': case ')': case ']': if (start = array_pop(stack)){ start.meta['pair'] = current; current.meta['pair'] = start; } else { throw new \Exception('Unbalanced token stream'); } break; } } if (stacklength > 0) throw new \Exception('Unbalanced token stream'); } ... } ``` This would come with a natural way of blocking unbalanced pair of tokens (which is a good thing) but also with a general performance penalty. Matching a layer would become as easy as: ``` if (current.token.type === '{') { current = current.next; end = current.meta['pair']; layer = []; while (current !== end) { layer[] = current.token; current = current.next; } } ``` But since macros with `{···match}`, `(···match)` or `[···match]` as entry point are the only ones that could be significatively benefited with such "ahead of time token pair mapping" optimization - and these kind if macros are not very likely to happen - let's leave this for later consideration. --- src/Parser.php | 4 +- src/parsers.php | 47 +++++++++++++++---- .../macro/layer_matcher_error_unbalanced.phpt | 16 +++++++ .../layer_matcher_error_unbalanced_end.phpt | 16 +++++++ 4 files changed, 71 insertions(+), 12 deletions(-) create mode 100644 tests/phpt/macro/layer_matcher_error_unbalanced.phpt create mode 100644 tests/phpt/macro/layer_matcher_error_unbalanced_end.phpt diff --git a/src/Parser.php b/src/Parser.php index f90aa63..0e19603 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -104,10 +104,10 @@ final function withErrorLevel(bool $errorLevel) : self return $this; } - final protected function error(TokenStream $ts) /*: Error|null*/ + final protected function error(TokenStream $ts, Expected $expected = null) /*: Error|null*/ { if ($this->errorLevel === Error::ENABLED) - return new Error($this->expected(), $ts->current(), $ts->last()); + return new Error($expected ?: $this->expected(), $ts->current(), $ts->last()); } } diff --git a/src/parsers.php b/src/parsers.php index 16f554f..3f95630 100644 --- a/src/parsers.php +++ b/src/parsers.php @@ -329,10 +329,10 @@ function isFallible() : bool T_CURLY_OPEN => 1, T_DOLLAR_OPEN_CURLY_BRACES => 1, '}' => -1, - '[' => 1, - ']' => -1, - '(' => 1, - ')' => -1, + '[' => 2, + ']' => -2, + '(' => 3, + ')' => -3, ]; function layer(array $delimiters = LAYER_DELIMITERS) : Parser @@ -342,15 +342,42 @@ function layer(array $delimiters = LAYER_DELIMITERS) : Parser function parser(TokenStream $ts, array $delimiters) /*: Result|null*/ { $level = 1; + $stack = []; $tokens = []; - while ( - (null !== ($token = $ts->current())) && - ($level += ($delimiters[$token->type()] ?? 0)) - ){ - $tokens[] = $token; - $ts->step(); + $current = $ts->index(); + while (true) { + if ((null === ($token = $current->token))) break; + + $delimiter = $token->type(); + $factor = $delimiters[$delimiter] ?? 0; + + if ($factor > 0) { + $level++; + $stack[] = $delimiter; + } + else if ($factor < 0) { + $level--; + if ($pair = array_pop($stack)) { + if (($factor + $delimiters[$pair])!== 0) { + $ts->jump($current); + + // reverse enginner delimiters to get the expected closing pair + $expected = array_search(-$delimiters[$pair], $delimiters); + + return $this->error($ts, new Expected(new Token($expected))); + } + } + } + + if ($level > 0) { + $tokens[] = $token; + $current = $current->next; + continue; + } + break; } + $ts->jump($current); return new Ast($this->label, $tokens); } diff --git a/tests/phpt/macro/layer_matcher_error_unbalanced.phpt b/tests/phpt/macro/layer_matcher_error_unbalanced.phpt new file mode 100644 index 0000000..2c26f9c --- /dev/null +++ b/tests/phpt/macro/layer_matcher_error_unbalanced.phpt @@ -0,0 +1,16 @@ +--TEST-- +Layer matcher error with unbalanced token pairs +--FILE-- +> { + MATCH +} + +µ(foo, {bar, [baz}]); // pairs don't match + +?> +--EXPECTF-- +Unexpected '}' on line 9, expected ']'. diff --git a/tests/phpt/macro/layer_matcher_error_unbalanced_end.phpt b/tests/phpt/macro/layer_matcher_error_unbalanced_end.phpt new file mode 100644 index 0000000..1ddb3ab --- /dev/null +++ b/tests/phpt/macro/layer_matcher_error_unbalanced_end.phpt @@ -0,0 +1,16 @@ +--TEST-- +Non delimited layer matching +--FILE-- +> { + MATCH +} + +µ((() // pairs don't match + +?> +--EXPECTF-- +Unexpected end at T_CLOSE_TAG(?>) on line 11, expected ')'. From 2788345e06e0e61c16189ec946e7505f94648441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Thu, 23 Feb 2017 07:32:13 -0400 Subject: [PATCH 10/16] improves performance, cuts half CPU instructions From benchmark using valgrind+callgrind, zend.assertions=-1; Expanding a 30k LOC file: before: 723071752 after: 328989510 This commit is doing a lot, but here goes a description to avoid a big history rewrite: :lazy_panda: .1 Inline hot TokenStream methods, saves countless method calls. .2 Improven token skipability by precomputing conditions within Nodes (reduces method calls) .3 Remove typehints from hot paths, use assertions instead .3 Improve token id generation with (__CLASS__)::$_id++; .4 Optimize token->equals() for most frequent scenario .5 Use token->value instead of (string) $token when converting token stream to string .6 Improve expansion rendering when expansion is a single token .7 Inline Directives::apply(), reduce ksort calls .8 Improve closure, use 'static' closure to prevent `$this` leak optimizations 1, 2 and 3 were the most important! --- src/BlueContext.php | 17 +-- src/Directives.php | 59 ++++++---- src/Expansion.php | 34 ++++-- src/Macro.php | 14 ++- src/Node.php | 8 +- src/NodeEnd.php | 2 +- src/Parser.php | 15 ++- src/Pattern.php | 26 +++-- src/Token.php | 77 ++++++++----- src/TokenStream.php | 226 ++++++++++++++++++++++++++------------ src/expanders.php | 8 +- src/parsers.php | 19 ++-- tests/ParserTest.php | 2 +- tests/TokenStreamTest.php | 19 ++-- tests/TokenTest.php | 41 +++---- yay_parse.php | 26 ++--- 16 files changed, 362 insertions(+), 231 deletions(-) diff --git a/src/BlueContext.php b/src/BlueContext.php index 045fad4..f3409fb 100644 --- a/src/BlueContext.php +++ b/src/BlueContext.php @@ -5,17 +5,18 @@ class BlueContext { private $map = []; - function addDisabledMacros(Token $token, array $macros) { - if (! isset($this->map[$token->id()])) $this->map[$token->id()] = []; + function addDisabledMacros($token, $macros) { + assert($token instanceof Token); + assert(\is_array($macros)); - $this->map[$token->id()] += $macros; + foreach ($macros as $id => $_) $this->map[$token->id()][$id] = true; } - function isMacroDisabled(Token $token, Macro $macro): bool { - return isset($this->map[$token->id()][$macro->id()]); - } + function getDisabledMacros($token) { + assert($token instanceof Token); + + if (isset($this->map[$token->id()])) return $this->map[$token->id()]; - function getDisabledMacros(Token $token): array { - return $this->map[$token->id()] ?? []; + return []; } } diff --git a/src/Directives.php b/src/Directives.php index 88c29c2..7fbdb8a 100644 --- a/src/Directives.php +++ b/src/Directives.php @@ -5,34 +5,55 @@ class Directives { protected - $directives = [], - $raytraceLiteral = [], - $raytraceNonliteral = [] + $literalHitMap = [], + $typeHitMap = [] ; function add(Directive $directive) { + $specificity = $directive->pattern()->specificity(); + $identity = $directive->id(); $expectations = $directive->pattern()->expected()->all(); - foreach ($expectations as $expected) - if ($v = (string) $expected) - $this->raytraceLiteral[$v] = true; - else - $this->raytraceNonliteral[$expected->type()] = true; - - $this->directives[$directive->pattern()->specificity()][$directive->id()] = $directive; - krsort($this->directives); + foreach ($expectations as $expected) { + if ($key = (string) $expected) { + $this->literalHitMap[$key][$specificity][$identity] = $directive; + krsort($this->literalHitMap[$key]); + } + else { + $this->typeHitMap[$expected->type()][$specificity][$identity] = $directive; + krsort($this->typeHitMap[$expected->type()]); + } + } } - function apply(TokenStream $ts, Token $t, BlueContext $blueContext) { - if ( - isset($this->raytraceLiteral[(string) $t]) || - isset($this->raytraceNonliteral[$t->type()]) - ) { - foreach ($this->directives as $directives) { - foreach ($directives as $directive) { - $directive->apply($ts, $this, $blueContext); + function apply(TokenStream $ts, BlueContext $blueContext) { + $token = $ts->current(); + + while (null !== $token) { + + $tstring = $token->value(); + + // skip when something looks like a new macro to be parsed + if ('macro' === $tstring) break; + + // here attempt to match and expand userland macros + // but just in case at least one macro passes the entry point heuristics + if (isset($this->literalHitMap[$tstring])) { + foreach ($this->literalHitMap[$tstring] as $directives) { + foreach ($directives as $directive) { + $directive->apply($ts, $this, $blueContext); + } } } + else if (isset($this->typeHitMap[$token->type()])) { + foreach ($this->typeHitMap[$token->type()] as $directives) { + foreach ($directives as $directive) { + $directive->apply($ts, $this, $blueContext); + } + } + } + + $token = $ts->next(); } } } diff --git a/src/Expansion.php b/src/Expansion.php index e5401ca..5b31f0f 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -47,7 +47,9 @@ function expand(Ast $crossover, Cycle $cycle, Directives $directives, BlueContex $expansion = clone $this->expansion; if ($this->unsafe) - hygienize($expansion, ['scope' => $cycle->id(),]); + hygienize($expansion, Map::fromKeysAndValues(['scope' => $cycle->id(),])); + + if ($this->constant) return $expansion; return $this->mutate($expansion, $crossover, $cycle, $directives, $blueContext); } @@ -156,8 +158,6 @@ private function compile(array $expansion, Map $context) : TokenStream { private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives $directives, BlueContext $blueContext) : TokenStream { - if ($this->constant) return $ts; - static $states, $parser; $states = $states ?? new Stack; @@ -168,6 +168,23 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives ( token(Token::CLOAKED) , + rtoken('/^T_\w+·\w+$/')->as('label')->onCommit(function(Ast $result) use ($states) { + $cg = $states->current(); + $ts = $cg->ts; + + $token = $cg->this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + + $ts->previous(); + $node = $ts->index(); + + if ($token instanceof Token) + $node->token = $token; + else + $ts->extract($node, $node->next); + + $ts->next(); + }) + , consume ( chain @@ -190,7 +207,7 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives 'blueContext' => $cg->blueContext ]); $expansion = TokenStream::fromSlice($result->args); - $mutation = $cg->this->mutate(clone $expansion, $cg->context, $cg->cycle, $cg->directives, $cg->blueContext); + $mutation = $cg->this->mutate($expansion, $cg->context, $cg->cycle, $cg->directives, $cg->blueContext); $mutation = $cg->this->lookupExpander($expander)($mutation, $context); $cg->ts->inject($mutation); }) @@ -223,15 +240,15 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives if ($context === null) return; - $expansion = TokenStream::fromSlice($result->expansion); $delimiters = $result->delimiters; // normalize single context if (array_values($context) !== $context) $context = [$context]; foreach (array_reverse($context) as $i => $subContext) { + $expansion = TokenStream::fromSlice($result->expansion); $mutation = $cg->this->mutate( - clone $expansion, + $expansion, (new Ast(null, $subContext))->withParent($cg->context), $cg->cycle, $cg->directives, @@ -244,7 +261,7 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives , consume ( - rtoken('/^(T_\w+·\w+|·\w+|···\w+)$/')->as('label') + rtoken('/^·\w+|···\w+$/')->as('label') ) ->onCommit(function(Ast $result) use ($states) { $cg = $states->current(); @@ -254,7 +271,7 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives if ($context instanceof Token) { $cg->ts->inject(TokenStream::fromSequence($context)); } - elseif (is_array($context) && \count($context)) { + elseif (\is_array($context) && \count($context)) { $tokens = []; array_walk_recursive( $context, @@ -308,7 +325,6 @@ function(Token $token) use(&$tokens) { return $cg->ts; } - private function lookupExpander(Token $token) : string { $identifier = (string) $token; $expander = '\Yay\Dsl\Expanders\\' . explode('··', $identifier)[1]; diff --git a/src/Macro.php b/src/Macro.php index fde73bd..c90a5c2 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -17,10 +17,10 @@ class Macro implements Directive { $id ; - function __construct(Map $tags, Pattern $pattern, Expansion $expansion, Cycle $cycle) { - static $id = 0; + protected static $_id = 0; - $this->id = $id++; + function __construct(Map $tags, Pattern $pattern, Expansion $expansion, Cycle $cycle) { + $this->id = (__CLASS__)::$_id++; $this->tags = $tags; $this->pattern = $pattern; $this->expansion = $expansion; @@ -63,7 +63,7 @@ function apply(TokenStream $ts, Directives $directives, BlueContext $blueContext return; } - $ts->unskip(...TokenStream::SKIPPABLE); + $ts->unskip(); $to = $ts->index(); $ts->extract($from, $to); @@ -81,8 +81,10 @@ function apply(TokenStream $ts, Directives $directives, BlueContext $blueContext $ts->inject($expansion); } else { - $ts->unskip(...TokenStream::SKIPPABLE); - $ts->skip(T_WHITESPACE); + $ts->unskip(); + while (null !== ($token = $ts->current()) && $token->is(T_WHITESPACE)) { + $ts->step(); + } $to = $ts->index(); $ts->extract($from, $to); } diff --git a/src/Node.php b/src/Node.php index 4fd14d5..99ecc17 100644 --- a/src/Node.php +++ b/src/Node.php @@ -4,9 +4,13 @@ class Node implements Index { - public $token, $next, $previous; + public $token, $next, $previous, $skippable; - function __construct(Token $token) { $this->token = $token; } + function __construct($token) { + assert($token instanceof Token); + $this->token = $token; + $this->skippable = $token->isSkippable(); // cache skipability + } function __debugInfo() { return [$this->token]; } } diff --git a/src/NodeEnd.php b/src/NodeEnd.php index 248ea71..c14710d 100644 --- a/src/NodeEnd.php +++ b/src/NodeEnd.php @@ -4,7 +4,7 @@ class NodeEnd implements Index { - public $token, $previous; + public $token, $previous, $skippable = false; private $next; diff --git a/src/Parser.php b/src/Parser.php index 0e19603..5371a0f 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -70,9 +70,9 @@ function parse(TokenStream $ts) /*: Result|null*/ return $result; } - final function as(/*string|null*/ $label) : self + final function as($label) : self { - if (null !== $label) { + if ('' !== (string) $label) { if(false !== strpos($label, ' ')) throw new InvalidArgumentException( "Parser label cannot contain spaces, '{$label}' given."); @@ -94,10 +94,13 @@ final function withErrorLevel(bool $errorLevel) : self { if ($this->errorLevel !== $errorLevel) { $this->errorLevel = $errorLevel; - foreach ($this->stack as $substack) { - if ($substack instanceof self) { - $substack->{__FUNCTION__}($this->errorLevel); - } + + if ($this->stack) { + array_walk_recursive($this->stack, function($substack){ + if ($substack instanceof self) { + $substack->withErrorLevel($this->errorLevel); + } + }); } } diff --git a/src/Pattern.php b/src/Pattern.php index 594b8fb..20ce978 100644 --- a/src/Pattern.php +++ b/src/Pattern.php @@ -157,12 +157,12 @@ private function compile(array $tokens) { ) ->parse($cg->ts); + $this->specificity = \count($cg->parsers); + // check if macro dominance '·' is last token - if ($this->dominance === \count($cg->parsers)) + if ($this->dominance === $this->specificity) $this->fail(self::E_BAD_DOMINANCE, $this->dominance, $cg->ts->last()->line()); - $this->specificity = \count($cg->parsers); - if ($this->specificity > 1) { if (0 === $this->dominance) { $pattern = chain(...$cg->parsers); @@ -216,10 +216,10 @@ private function lookupTokenType(Token $token) : int { return constant($type); } - protected function lookupCapture(Token $token) /*: string|null*/ { + protected function lookupCapture(Token $token) : string { $id = (string) $token; - if ($id === '·_') return null; + if ($id === '·_') return ''; if ($this->scope->contains($id)) $this->fail(self::E_IDENTIFIER_REDEFINITION, $id, $token->line()); @@ -275,9 +275,7 @@ private function compileParserArgs(array $args) : array { $compiled[] = $this->lookupTokenType($arg); break; case 'function': // function(...){...} - $arglist = implode('', $arg['args']); - $body = implode('', $arg['body']); - $compiled[] = eval("return function({$arglist}){ {$body} };"); + $compiled[] = $this->compileAnonymousFunctionArg($arg); break; default: $compiled = array_merge( @@ -286,4 +284,16 @@ private function compileParserArgs(array $args) : array { return $compiled; } + + private function compileAnonymousFunctionArg(array $arg) : \Closure { + $arglist = implode('', $arg['args']); + $body = implode('', $arg['body']); + $source = " true, + T_COMMENT => true, + T_DOC_COMMENT => true, + ]; + /** * pseudo token types */ @@ -28,38 +34,36 @@ class Token implements \JsonSerializable { $type, $value, $line, - $literal = false + $skippable = false ; private $id ; - function __construct($type, string $value = null, int $line = null) { - static $id = 0; + protected static $_id = 0; - $this->id = $id++; + function __construct($type, $value = null, $line = null) { assert(null === $this->type, "Attempt to modify immutable token."); - assert(\is_int($type) || \is_string($type), "Token type must be int or string."); + $this->id = (__CLASS__)::$_id++; if (\is_string($type)) { - if(1 !== \mb_strlen($type)) - throw new YayException("Invalid token type '{$type}'"); - - $this->literal = true; - $value = $type; + $this->value = $type; + } + else { + $this->skippable = isset((__CLASS__)::SKIPPABLE[$type]); + $this->value = $value; } $this->type = $type; - $this->value = $value; $this->line = $line; - } - private function __clone() {} + assert($this->check()); + } - function __toString(): string { + function __toString() { return (string) $this->value; } @@ -70,30 +74,29 @@ function __debugInfo() { function dump(): string { $name = $this->name(); - return $this->literal ? "'{$name}'" : "{$name}({$this->value})"; - } - - function is(/* string|int */ ...$types): bool { - return \in_array($this->type, $types); + return $this->type === $this->value ? "'{$name}'" : "{$name}({$this->value})"; } - function contains($value): bool { - return $value === null ?: $this->value === $value; + function is($type) { + return $this->type === $type; } - function equals(self $token): bool { + function equals(self $token) { return - // inlined $this->is() ($this->type === $token->type && - // inlined $this->contains() - ($token->value === null ?: $this->value === $token->value)); + ($this->value === $token->value ?: + ($token->value === null ?: $this->value === null))); + } + + function isSkippable() { + return $this->skippable; } function name(): string { return - ($this->literal) + ($this->type === $this->value) ? $this->type - : self::TOKENS[$this->type] ?? \token_name($this->type) + : (__CLASS__)::TOKENS[$this->type] ?? \token_name($this->type) ; } @@ -101,15 +104,29 @@ function type() /* : string|int */ { return $this->type; } - function line(): int { + function value() { + return $this->value; + } + + function line() { return $this->line; } - function id(): int { + function id() { return $this->id; } function jsonSerialize() { - return $this->__toString(); + return (string) $this; + } + + private function check() { + assert(\is_int($this->id)); + assert(\is_bool($this->skippable)); + assert(\is_int($this->type) || (\is_string($this->type) && \strlen($this->type) === 1), "Token type must be int or string[0]."); + assert(\is_string($this->value) || (\is_null($this->value)), "Token value must be string or null."); + assert(\is_int($this->line) || (\is_null($this->line)), "Token line must be int or null."); + + return true; } } diff --git a/src/TokenStream.php b/src/TokenStream.php index 10ddf87..6f37211 100644 --- a/src/TokenStream.php +++ b/src/TokenStream.php @@ -8,10 +8,6 @@ class TokenStream { - const - SKIPPABLE = [T_WHITESPACE, T_COMMENT, T_DOC_COMMENT] - ; - protected $first, $current, @@ -39,68 +35,99 @@ function toSource(NodeStart $node) : string { } function __clone() { - $tokens = []; - $node = $this->first->next; - while ($node instanceof Node) { - $tokens[] = $node->token; - $node = $node->next; + $first = new NodeStart; + $last = new NodeEnd; + + $first->next = $last; + $last->previous = $first; + + $current = $first; + $old = $this->first->next; + while ($old instanceof Node) { + $node = new Node($old->token); + $current->next = $node; + $node->previous = $current; + $current = $node; + $old = $old->next; } - $ts = self::fromSlice($tokens); - $this->first = $ts->first; - $this->last = $ts->last; - $this->reset(); + $current->next = $last; + $last->previous = $current; + + $this->first = $first; + $this->last = $last; + $this->current = $this->first->next; } function index() /* : Node|null */ { return $this->current; } - function jump(Index $index) /* : void */ { $this->current = $index; } + function jump($index) /* : void */ { + assert($index instanceof Index); - function reset() /* : void */ { $this->jump($this->first->next); } + if ($index instanceof NodeStart) + $this->current = $this->first->next; + else + $this->current = $index; + } - function current() /* : Token|null */ { - if ($this->current instanceof NodeStart) $this->reset(); + function reset() /* : void */ { + $this->current = $this->first->next; + } + function current() /* : Token|null */ { return $this->current->token; } function step() /* : Token|null */ { - if (!($this->current instanceof NodeEnd)) $this->current = $this->current->next; + $this->current = $this->current->next; - return $this->current(); + return $this->current->token; } function back() /* : Token|null */ { - if (!($this->current instanceof NodeStart)) $this->current = $this->current->previous; + $this->current = $this->current->previous; - return $this->current(); + return $this->current->token; } - function skip(int ...$types) /* : Token|null */ { - while (null !== ($t = $this->current()) && $t->is(...$types)) $this->step(); + function skip() /* : Token|null */ { + while ($this->current->skippable) { + $this->current = $this->current->next; + } - return $this->current(); + return $this->current->token; } - function unskip(int ...$types) /* : Token|null */ { - while (null !== ($t = $this->back()) && $t->is(...$types)); - $this->step(); + function unskip() /* : Token|null */ { + $this->current = $this->current->previous; + + while ($this->current->skippable) { + $this->current = $this->current->previous; + } + + $this->current = $this->current->next; - return $this->current(); + return $this->current->token; } function next() /* : Token|null */ { - $this->step(); - $this->skip(...self::SKIPPABLE); + $this->current = $this->current->next; + + while ($this->current->skippable) { + $this->current = $this->current->next; + } - return $this->current(); + return $this->current->token; } function previous() /* : Token|null */ { - $this->unskip(...self::SKIPPABLE); - $this->back(); + $this->current = $this->current->previous; + + while ($this->current->skippable) { + $this->current = $this->current->previous; + } - return $this->current(); + return $this->current->token; } function last() : Token { @@ -112,9 +139,15 @@ function first() : Token { } function trim() { - while (null !== ($t = $this->first->next->token) && $t->is(T_WHITESPACE)) $this->shift(); - while (null !== ($t = $this->last->previous->token) && $t->is(T_WHITESPACE)) $this->pop(); - $this->reset(); + while (null !== ($t = $this->first->next->token) && $t->is(T_WHITESPACE)) { + $this->first->next = $this->first->next->next; + $this->first->next->previous = $this->first; + } + while (null !== ($t = $this->last->previous->token) && $t->is(T_WHITESPACE)) { + $this->last->previous = $this->last->previous->previous; + $this->last->previous->next = $this->last; + } + $this->current = $this->first->next; } function shift() { @@ -127,36 +160,52 @@ function pop() { $this->last->previous->next = $this->last; } - function extract(Index $from, Index $to) { + function extract($from, $to) { + assert($from instanceof Index); + assert($to instanceof Index); + assert($from !== $to); assert(! $this->isEmpty()); $from = $from->previous; - self::link($from, $to); - $this->jump($from); + $from->next = $to; + $to->previous = $from; + + $this->current = $from; } - function inject(self $ts) { - if ($ts->isEmpty()) return; + function inject($ts) { + assert($ts instanceof self); + + if (($ts->first->next instanceof NodeEnd) + && ($ts->last->previous instanceof NodeStart)) return; - if ($this->current instanceof NodeEnd) $this->jump($this->last->previous); + if ($this->current instanceof NodeEnd) $this->current = $this->last->previous; - $a = $this->isEmpty() ? $this->first : $this->current; + $a = $this->current; $b = $ts->first->next; $e = $ts->last->previous; - $f = $this->isEmpty() ? $this->last : $this->current->next; + $f = $this->current->next; - self::link($a, $b); - self::link($e, $f); + $a->next = $b; + $b->previous = $a; + + $e->next = $f; + $f->previous = $e; } - function push(Token $token) { + function push($token) { + assert($token instanceof Token); + $a = $this->last->previous; $b = new Node($token); $c = $this->last; - self::link($a, $b); - self::link($b, $c); + $a->next = $b; + $b->previous = $a; + + $b->next = $c; + $c->previous = $b; } function isEmpty() : bool { @@ -166,49 +215,82 @@ function isEmpty() : bool { ; } - static function fromSourceWithoutOpenTag(string $source) : self { $ts = self::fromSource('shift(); + $ts->first->next = $ts->first->next->next; + $ts->first->next->previous = $ts->first; return $ts; } static function fromSource(string $source) : self { + $tokens = \token_get_all($source); + + $ts = new self; + $first = new NodeStart; + $last = new NodeEnd; + + $first->next = $last; + $last->previous = $first; + $line = 0; - $tokens = token_get_all($source); + $current = $first; + foreach ($tokens as $t){ + if (\is_array($t)) { + $line = $t[2]; + $token = new Token(...$t); + } + else { + $token = new Token($t, $t, $line); + } - foreach ($tokens as $i => $token) // normalize line numbers - if (is_array($token)) - $line = $token[2]; - else - $tokens[$i] = [$token, $token, $line]; + $node = new Node($token); + $current->next = $node; + $node->previous = $current; - return self::fromSequence(...$tokens); - } + $current = $node; + } - static function fromSequence(...$tokens) : self { - foreach ($tokens as $i => $t) - $tokens[$i] = ($t instanceof Token) ? $t : new Token(...$t); + $current->next = $last; + $last->previous = $current; - return self::fromSlice($tokens); + $ts->first = $first; + $ts->last = $last; + + $ts->current = $ts->first->next; + + return $ts; } static function fromSlice(array $tokens) : self { $ts = new self; - $ts->first = new NodeStart; - $ts->last = new NodeEnd; - self::link($ts->first, $ts->last); + $first = new NodeStart; + $last = new NodeEnd; + + $first->next = $last; + $last->previous = $first; - foreach ($tokens as $token) $ts->push($token); + $current = $first; + foreach ($tokens as $token){ + $node = new Node($token); + $current->next = $node; + $node->previous = $current; - $ts->reset(); + $current = $node; + } + + $current->next = $last; + $last->previous = $current; + + $ts->first = $first; + $ts->last = $last; + + $ts->current = $ts->first->next; return $ts; } - private static function link($a, $b) { - $a->next = $b; - $b->previous = $a; + static function fromSequence(Token ...$tokens) : self { + return self::fromSlice($tokens); } } diff --git a/src/expanders.php b/src/expanders.php index 5ad69c9..9a917e6 100644 --- a/src/expanders.php +++ b/src/expanders.php @@ -47,12 +47,12 @@ function concat(TokenStream $ts) : TokenStream { return TokenStream::fromSequence(new Token(T_STRING, $buffer)); } -function hygienize(TokenStream $ts, array $context) : TokenStream { +function hygienize(TokenStream $ts, Context $context) : TokenStream { $ts->reset(); $cg = (object)[ 'node' => null, - 'context' => $context, + 'scope' => $context->get('scope'), 'ts' => $ts ]; @@ -79,7 +79,9 @@ function hygienize(TokenStream $ts, array $context) : TokenStream { ) ->onCommit(function(Ast $result) use ($cg) { if (($t = $cg->node->token) && (($value = (string) $t) !== '$this')) - $cg->node->token = new Token($t->type(), "{$value}·{$cg->context['scope']}", $t->line()); + $cg->node->token = new Token($t->type(), "{$value}·{$cg->scope}", $t->line()); + + $cg->node = null; }) ) ->parse($ts); diff --git a/src/parsers.php b/src/parsers.php index 3f95630..93311cd 100644 --- a/src/parsers.php +++ b/src/parsers.php @@ -20,7 +20,6 @@ function token($type, $value = null) : Parser function __construct($type, Token $token) { $this->type = $type; - $this->stack = [$token]; $this->token = $token; $this->expected = new Expected($token); } @@ -62,7 +61,7 @@ protected function parser(TokenStream $ts, string $regexp) /*: Result|null*/ { $token = $ts->current(); - if (null !== $token && 1 === preg_match($regexp, (string) $token)) { + if (null !== $token && 1 === preg_match($regexp, $token->value())) { $ts->next(); return new Ast($this->label, $token); @@ -173,11 +172,11 @@ protected function parser(TokenStream $ts, string $operator) /*: Result|null*/ while ( (mb_strlen($buffer) <= $max) && (null !== ($token = $ts->current())) && - (false !== mb_strstr($operator, ($current = (string) $token))) + (false !== mb_strstr($operator, ($current = $token->value()))) ){ $ts->step(); if(($buffer .= $current) === $operator) { - $ts->skip(...TokenStream::SKIPPABLE); + $ts->skip(); return new Ast($this->label, new Token(Token::OPERATOR, $buffer)); } } @@ -229,7 +228,7 @@ function traverse(Parser ...$parsers) : Parser return new class(__FUNCTION__, $parser) extends Parser { - protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ + protected function parser(TokenStream $ts, Parser $parser) : Ast { while ($parser->parse($ts) instanceof Ast); @@ -261,7 +260,7 @@ protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ $ast = new Ast($this->label); while( - ($current = $ts->current()) && + (null !== $ts->current()) && (($partial = $parser->parse($ts)) instanceof Ast) ){ $ast->append($partial); @@ -551,8 +550,12 @@ protected function parser(TokenStream $ts, Parser $parser, int $trim) /*: Result $from = $ts->index(); $ast = $parser->parse($ts); if ($ast instanceof Ast) { - $ts->unskip(...TokenStream::SKIPPABLE); - if ($trim & CONSUME_DO_TRIM) $ts->skip(T_WHITESPACE); + $ts->unskip(); + if ($trim & CONSUME_DO_TRIM) { + while (null !== ($token = $ts->current()) && $token->is(T_WHITESPACE)) { + $ts->step(); + } + } $to = $ts->index(); $ts->extract($from, $to); diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 001f4e4..8e9a6e5 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -35,7 +35,7 @@ protected function parseError(TokenStream $ts, Parser $parser, $msg) { $current = $ts->current(); $result = $parser->onCommit( function($commit) use($parser) { - $this->fail("Unexpected commit on {$parser->type()}()."); + $this->fail("Unexpected commit on {$parser}()."); } ) ->withErrorLevel(Error::ENABLED) diff --git a/tests/TokenStreamTest.php b/tests/TokenStreamTest.php index 744a753..eaca1fc 100644 --- a/tests/TokenStreamTest.php +++ b/tests/TokenStreamTest.php @@ -41,8 +41,6 @@ function testStep() { $this->assertSame('F', (string) $ts->step()); $this->assertSame(null, $ts->step()); $this->assertSame(null, $ts->current()); - $this->assertSame(null, $ts->step()); - $this->assertSame(null, $ts->current()); } function testBack() { @@ -55,6 +53,8 @@ function testBack() { $this->assertSame(' ', (string) $ts->back()); $this->assertSame('A', (string) $ts->back()); $this->assertSame('back()); + $this->assertSame(null, $ts->back()); + $this->assertSame(null, $ts->current()); } @@ -70,8 +70,6 @@ function testNext() { $this->assertSame('6', (string) $ts->next()); $this->assertSame(null, $ts->next()); $this->assertSame(null, $ts->current()); - $this->assertSame(null, $ts->next()); - $this->assertSame(null, $ts->current()); } function testReset() { @@ -129,13 +127,12 @@ function testLoop(string $src) { function testClone() { $tsa = TokenStream::fromSource('reset(); $this->assertNotSame($tsa->index(), $tsb->index()); } function testExtract() { $ts = TokenStream::fromSequence( - [T_STRING, 'T_VARIABLE·A', 0], [T_WHITESPACE, ' ', 0], [T_STRING, 'T_VARIABLE·B', 0]); + new Token(T_STRING, 'T_VARIABLE·A', 0), new Token(T_WHITESPACE, ' ', 0), new Token(T_STRING, 'T_VARIABLE·B', 0)); $ts->extract($ts->index(), $ts->index()->next); @@ -147,18 +144,18 @@ function testInject() { $ts->next(); $ts->step(); $ts->inject(TokenStream::fromSequence( - [T_STRING, 'MIDDLE_B', 0], [T_WHITESPACE, ' ', 0])); + new Token(T_STRING, 'MIDDLE_B', 0), new Token(T_WHITESPACE, ' ', 0))); $ts->inject(TokenStream::fromSequence( - [T_STRING, 'MIDDLE_A', 0], [T_WHITESPACE, ' ', 0])); + new Token(T_STRING, 'MIDDLE_A', 0),new Token( T_WHITESPACE, ' ', 0))); $this->assertEquals('inject(TokenStream::fromSequence( - [T_WHITESPACE, ' ', 0], [T_STRING, 'BAR', 0], [T_WHITESPACE, ' ', 0])); + new Token(T_WHITESPACE, ' ', 0), new Token(T_STRING, 'BAR', 0), new Token(T_WHITESPACE, ' ', 0))); $this->assertEquals(' BAR ', (string) $ts); $ts->inject(TokenStream::fromSequence( - [T_WHITESPACE, ' ', 0], [T_STRING, 'FOO', 0], [T_WHITESPACE, ' ', 0])); + new Token(T_WHITESPACE, ' ', 0), new Token(T_STRING, 'FOO', 0), new Token(T_WHITESPACE, ' ', 0))); $this->assertEquals(' FOO BAR ', (string) $ts); $ts = TokenStream::fromSource('next(); $node = $ts->index(); $partial = TokenStream::fromSequence( - [T_WHITESPACE, ' ', 0], [T_STRING, 'C', 0], [T_WHITESPACE, ' ', 0]); + new Token(T_WHITESPACE, ' ', 0), new Token(T_STRING, 'C', 0), new Token(T_WHITESPACE, ' ', 0)); $index = $partial->index(); $ts->inject($partial); $this->assertEquals('assertTrue($token->is('$')); - $this->assertTrue($token->is('$', ':', '!')); - $this->assertTrue($token->is('!', '?', ':', '$')); - $this->assertFalse($token->is('!', '?', ':')); - $this->assertFalse($token->is('$!')); + $this->assertFalse($token->is('!')); $token = new Token(T_STRING); - $this->assertFalse($token->is(T_OPEN_TAG, T_YIELD)); + $this->assertFalse($token->is(T_OPEN_TAG)); $this->assertTrue($token->is(T_STRING)); $token = new Token(T_STRING, '"value"', null); - - $this->assertFalse($token->is(T_OPEN_TAG, T_YIELD)); - $this->assertTrue($token->is(T_CLOSE_TAG, T_STRING)); - } - - function testContains() { - $token = new Token('$'); - $this->assertTrue($token->contains('$')); - $this->assertFalse($token->contains('!')); - - $token = new Token(T_STRING); - $this->assertTrue($token->contains(null)); - $this->assertFalse($token->contains('')); - - $token = new Token(T_STRING, '"yep"', null); - $this->assertTrue($token->contains(null)); - $this->assertFalse($token->contains('"nope"')); - $this->assertTrue($token->contains('"yep"')); + $this->assertFalse($token->is(T_OPEN_TAG)); + $this->assertTrue($token->is(T_STRING)); } - } diff --git a/yay_parse.php b/yay_parse.php index 3db5010..38276d2 100644 --- a/yay_parse.php +++ b/yay_parse.php @@ -1,6 +1,6 @@ add($d); + $cg = (object) [ 'ts' => TokenStream::fromSource($source), 'directives' => $directives, @@ -28,27 +32,11 @@ function yay_parse(string $source, Directives $directives = null, BlueContext $b 'blueContext' => $blueContext, ]; - foreach($cg->globalDirectives as $d) $cg->directives->add($d); - traverse ( // this midrule is where the preprocessor really does the job! - midrule(function(TokenStream $ts) use ($directives, $blueContext) { - $token = $ts->current(); - - tail_call: { - if (null === $token) return; - - // skip when something looks like a new macro to be parsed - if ('macro' === (string) $token) return; - - // here we do the 'magic' to match and expand userland macros - $directives->apply($ts, $token, $blueContext); - - $token = $ts->next(); - - goto tail_call; - } + midrule(function(TokenStream $ts) use ($cg) { + $cg->directives->apply($ts, $cg->blueContext); }) , // here we parse, compile and allocate new macros From 1463a711dcc21213594b5b95c6ac7d1eabd020fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Fri, 24 Feb 2017 08:43:22 -0400 Subject: [PATCH 11/16] centers engine into a single class instead of a function API The movable parts are still available, but this is easier for users to setup the preprocessor. This bc breaks anything relying on yay_parse() and yay_pretty(). Instead of `yay_parse()` the following should be used intead: ``` $engine = new \Yay\Engine; $output = $engine->expand($source); ``` cc @assertchris --- bin/yay | 48 ++++++----- bin/yay-pretty | 9 +- bootstrap.php | 3 - composer.json | 5 +- phpunit.xml | 1 - src/Directive.php | 2 +- src/Directives.php | 59 ------------- src/Engine.php | 166 ++++++++++++++++++++++++++++++++++++ src/Expansion.php | 27 ++---- src/Macro.php | 30 +++---- src/StreamWrapper.php | 10 ++- src/expanders.php | 14 +-- tests/MacroScopeTest.php | 4 +- tests/SpecsTest.php | 17 +++- tests/StreamWrapperTest.php | 4 +- yay_parse.php | 105 ----------------------- yay_pretty.php | 11 --- 17 files changed, 260 insertions(+), 255 deletions(-) delete mode 100644 bootstrap.php delete mode 100644 src/Directives.php create mode 100644 src/Engine.php delete mode 100644 yay_parse.php delete mode 100644 yay_pretty.php diff --git a/bin/yay b/bin/yay index 12ed307..fac326f 100755 --- a/bin/yay +++ b/bin/yay @@ -1,34 +1,40 @@ #!/usr/bin/env php expand($source, $file); - if ($source === false) - throw new InvalidArgumentException("File '{$file}' not found'."); + gc_enable(); - file_put_contents('php://stdout', yay_parse($source)); + file_put_contents('php://stdout', $expansion); } catch (Exception $e) { file_put_contents('php://stderr', $e . PHP_EOL); diff --git a/bin/yay-pretty b/bin/yay-pretty index 439390d..42d3d6e 100755 --- a/bin/yay-pretty +++ b/bin/yay-pretty @@ -1,6 +1,8 @@ #!/usr/bin/env php create(ParserFactory::PREFER_PHP7); + $prettyPrinter = new PrettyPrinter\Standard; + $stmts = $parser->parse($source); + $output = $prettyPrinter->prettyPrintFile($stmts); + + file_put_contents('php://stdout', $output); } catch (Exception $e) { file_put_contents('php://stderr', $e . PHP_EOL); diff --git a/bootstrap.php b/bootstrap.php deleted file mode 100644 index 29bd528..0000000 --- a/bootstrap.php +++ /dev/null @@ -1,3 +0,0 @@ - src/ - yay_parse.php diff --git a/src/Directive.php b/src/Directive.php index f99689f..902d6ba 100644 --- a/src/Directive.php +++ b/src/Directive.php @@ -12,5 +12,5 @@ function pattern() : Pattern; function expansion() : Expansion; - function apply(TokenStream $TokenStream, Directives $directives, BlueContext $blueContext); + function apply(TokenStream $TokenStream, Engine $engine); } diff --git a/src/Directives.php b/src/Directives.php deleted file mode 100644 index 7fbdb8a..0000000 --- a/src/Directives.php +++ /dev/null @@ -1,59 +0,0 @@ -pattern()->specificity(); - $identity = $directive->id(); - $expectations = $directive->pattern()->expected()->all(); - - foreach ($expectations as $expected) { - if ($key = (string) $expected) { - $this->literalHitMap[$key][$specificity][$identity] = $directive; - krsort($this->literalHitMap[$key]); - } - else { - $this->typeHitMap[$expected->type()][$specificity][$identity] = $directive; - krsort($this->typeHitMap[$expected->type()]); - } - } - } - - function apply(TokenStream $ts, BlueContext $blueContext) { - $token = $ts->current(); - - while (null !== $token) { - - $tstring = $token->value(); - - // skip when something looks like a new macro to be parsed - if ('macro' === $tstring) break; - - // here attempt to match and expand userland macros - // but just in case at least one macro passes the entry point heuristics - if (isset($this->literalHitMap[$tstring])) { - foreach ($this->literalHitMap[$tstring] as $directives) { - foreach ($directives as $directive) { - $directive->apply($ts, $this, $blueContext); - } - } - } - else if (isset($this->typeHitMap[$token->type()])) { - foreach ($this->typeHitMap[$token->type()] as $directives) { - foreach ($directives as $directive) { - $directive->apply($ts, $this, $blueContext); - } - } - } - - $token = $ts->next(); - } - } -} diff --git a/src/Engine.php b/src/Engine.php new file mode 100644 index 0000000..96773fa --- /dev/null +++ b/src/Engine.php @@ -0,0 +1,166 @@ +cycle = new Cycle; + $this->blueContext = new BlueContext; + + $this->parser = + traverse + ( + // this midrule is where the preprocessor does the expansion! + midrule(function(TokenStream $ts) { + $token = $ts->current(); + + while (null !== $token) { + + $tstring = $token->value(); + + // skip when something looks like a new macro to be parsed + if ('macro' === $tstring) break; + + // here attempt to match and expand userland macros + // but just in case at least one macro passes the entry point heuristics + if (isset($this->literalHitMap[$tstring])) { + foreach ($this->literalHitMap[$tstring] as $directives) { + foreach ($directives as $directive) { + $directive->apply($ts, $this); + } + } + } + else if (isset($this->typeHitMap[$token->type()])) { + foreach ($this->typeHitMap[$token->type()] as $directives) { + foreach ($directives as $directive) { + $directive->apply($ts, $this); + } + } + } + + $token = $ts->next(); + } + }) + , + // here we parse, compile and allocate new macros + consume + ( + chain + ( + token(T_STRING, 'macro')->as('declaration') + , + optional + ( + repeat + ( + rtoken('/^·\w+$/') + ) + ) + ->as('tags') + , + lookahead + ( + token('{') + ) + , + commit + ( + chain + ( + braces()->as('pattern') + , + operator('>>') + , + braces()->as('expansion') + ) + ) + ->as('body') + , + optional + ( + token(';') + ) + ) + , + CONSUME_DO_TRIM + ) + ->onCommit(function(Ast $macroAst) { + $scope = Map::fromEmpty(); + $tags = Map::fromValues(array_map('strval', $macroAst->{'tags'})); + $pattern = new Pattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + $expansion = new Expansion($macroAst->{'body expansion'}, $tags, $scope); + $macro = new Macro($tags, $pattern, $expansion); + + $this->registerDirective($macro); + }) + ) + ; + } + + function registerDirective(Directive $directive) { + $specificity = $directive->pattern()->specificity(); + $identity = $directive->id(); + $expectations = $directive->pattern()->expected()->all(); + + foreach ($expectations as $expected) { + if ($key = (string) $expected) { + $this->literalHitMap[$key][$specificity][$identity] = $directive; + krsort($this->literalHitMap[$key]); + } + else { + $this->typeHitMap[$expected->type()][$specificity][$identity] = $directive; + krsort($this->typeHitMap[$expected->type()]); + } + } + + if ($directive->tags()->contains('·global')) $this->globalDirectives[] = $directive; + } + + function blueContext() : BlueContext { + return $this->blueContext; + } + + + function cycle() : Cycle { + return $this->cycle; + } + + function expand(string $source, string $filename = '', int $gc = self::GC_ENGINE_ENABLED) : string { + + foreach ($this->globalDirectives as $d) $this->registerDirective($d); + + $ts = TokenStream::{$filename ? 'fromSource' : 'FromSourceWithoutOpenTag'}($source); + + $this->parser->parse($ts); + $expansion = (string) $ts; + + if (self::GC_ENGINE_ENABLED === $gc) { + // almost everything is local per file so state must be destroyed after expansion + // unless the flag ::GC_ENGINE_DISABLED forces a recycle during nested expansions + // global directives are allocated again later to give impression of persistence + $this->cycle = new Cycle; + $this->literalHitMap= $this->typeHitMap = []; + $this->blueContext = new BlueContext; + } + + return $expansion; + } +} diff --git a/src/Expansion.php b/src/Expansion.php index 5b31f0f..afd9d80 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -43,15 +43,15 @@ function isEmpty() : bool { return $this->expansion->isEmpty(); } - function expand(Ast $crossover, Cycle $cycle, Directives $directives, BlueContext $blueContext) : TokenStream { + function expand(Ast $crossover, Engine $engine) : TokenStream { $expansion = clone $this->expansion; if ($this->unsafe) - hygienize($expansion, Map::fromKeysAndValues(['scope' => $cycle->id(),])); + hygienize($expansion, $engine); if ($this->constant) return $expansion; - return $this->mutate($expansion, $crossover, $cycle, $directives, $blueContext); + return $this->mutate($expansion, $crossover, $engine); } private function compile(array $expansion, Map $context) : TokenStream { @@ -156,7 +156,7 @@ private function compile(array $expansion, Map $context) : TokenStream { return $cg->ts; } - private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives $directives, BlueContext $blueContext) : TokenStream { + private function mutate(TokenStream $ts, Ast $context, Engine $engine) : TokenStream { static $states, $parser; @@ -200,15 +200,10 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives $expander = $result->expander; if (\count($result->args) === 0) $cg->this->fail(self::E_EMPTY_EXPANDER_SLICE, (string) $expander, $expander->line()); - - $context = Map::fromKeysAndValues([ - 'scope' => $cg->cycle->id(), - 'directives' => $cg->directives, - 'blueContext' => $cg->blueContext - ]); $expansion = TokenStream::fromSlice($result->args); - $mutation = $cg->this->mutate($expansion, $cg->context, $cg->cycle, $cg->directives, $cg->blueContext); - $mutation = $cg->this->lookupExpander($expander)($mutation, $context); + $mutation = $cg->this->mutate($expansion, $cg->context, $cg->engine); + + $mutation = $cg->this->lookupExpander($expander)($mutation, $cg->engine); $cg->ts->inject($mutation); }) , @@ -250,9 +245,7 @@ private function mutate(TokenStream $ts, Ast $context, Cycle $cycle, Directives $mutation = $cg->this->mutate( $expansion, (new Ast(null, $subContext))->withParent($cg->context), - $cg->cycle, - $cg->directives, - $cg->blueContext + $cg->engine ); if ($i !== 0) foreach ($delimiters as $d) $mutation->push($d); $cg->ts->inject($mutation); @@ -288,10 +281,8 @@ function(Token $token) use(&$tokens) { $cg = (object) [ 'ts' => $ts, 'context' => $context, - 'directives' => $directives, - 'cycle' => $cycle, + 'engine' => $engine, 'this' => $this, - 'blueContext' => $blueContext ]; $states->push($cg); diff --git a/src/Macro.php b/src/Macro.php index c90a5c2..dad26f6 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -7,7 +7,6 @@ class Macro implements Directive { protected $pattern, $expansion, - $cycle, $tags, $terminal = true, $hasExpansion = true @@ -19,12 +18,11 @@ class Macro implements Directive { protected static $_id = 0; - function __construct(Map $tags, Pattern $pattern, Expansion $expansion, Cycle $cycle) { + function __construct(Map $tags, Pattern $pattern, Expansion $expansion) { $this->id = (__CLASS__)::$_id++; $this->tags = $tags; $this->pattern = $pattern; $this->expansion = $expansion; - $this->cycle = $cycle; $this->terminal = !$this->expansion->isRecursive(); $this->hasExpansion = !$this->expansion->isEmpty(); @@ -46,7 +44,8 @@ function expansion() : Expansion { return $this->expansion; } - function apply(TokenStream $ts, Directives $directives, BlueContext $blueContext) { + function apply(TokenStream $ts, Engine $engine) { + $from = $ts->index(); $crossover = $this->pattern->match($ts); @@ -55,6 +54,7 @@ function apply(TokenStream $ts, Directives $directives, BlueContext $blueContext if ($this->hasExpansion) { + $blueContext = $engine->blueContext(); $blueMacros = $this->getAllBlueMacrosFromCrossover($crossover->all(), $blueContext); if ($this->terminal && isset($blueMacros[$this->id])) { // already expanded @@ -67,7 +67,7 @@ function apply(TokenStream $ts, Directives $directives, BlueContext $blueContext $to = $ts->index(); $ts->extract($from, $to); - $expansion = $this->expansion->expand($crossover, $this->cycle, $directives, $blueContext); + $expansion = $this->expansion->expand($crossover, $engine); $blueMacros[$this->id] = true; @@ -89,18 +89,18 @@ function apply(TokenStream $ts, Directives $directives, BlueContext $blueContext $ts->extract($from, $to); } - $this->cycle->next(); + $engine->cycle()->next(); } - private function getAllBlueMacrosFromCrossover($nodes, BlueContext $blueContext): array { - $macros = []; - - foreach ($nodes as $node) - if ($node instanceof Token) - $macros += $blueContext->getDisabledMacros($node); - else - $macros += $this->getAllBlueMacrosFromCrossover($node, $blueContext); + private function getAllBlueMacrosFromCrossover($node, BlueContext $blueContext): array { + if ($node instanceof Token) + return $blueContext->getDisabledMacros($node); + else if(is_array($node)) { + $macros = []; + foreach ($node as $n) + $macros += $this->getAllBlueMacrosFromCrossover($n, $blueContext); - return $macros; + return $macros; + } } } diff --git a/src/StreamWrapper.php b/src/StreamWrapper.php index 43f344c..1d32fc3 100644 --- a/src/StreamWrapper.php +++ b/src/StreamWrapper.php @@ -17,14 +17,15 @@ final class StreamWrapper { ; protected static - $registered = false + $registered = false, + $engine ; protected $resource ; - static function register() + static function register(Engine $engine) { if (true === self::$registered) return; @@ -34,6 +35,7 @@ static function register() self::SCHEME . ' protocol.' ); + self::$engine = $engine; self::$registered = true; } @@ -55,6 +57,7 @@ static function unregister() ' protocol.' ); + self::$engine = null; self::$registered = false; } @@ -71,8 +74,7 @@ function stream_open(string $path, string $mode, int $flags, &$opened_path) : bo $opened_path = $path; - $source = yay_parse(file_get_contents($fileMeta->getRealPath())); - // var_dump($source); + $source = self::$engine->expand(file_get_contents($fileMeta->getRealPath()), $fileMeta->getRealPath()); $this->resource = fopen('php://memory', 'rb+'); fwrite($this->resource, $source); diff --git a/src/expanders.php b/src/expanders.php index 9a917e6..70a4c45 100644 --- a/src/expanders.php +++ b/src/expanders.php @@ -2,7 +2,7 @@ namespace Yay\DSL\Expanders; -use Yay\{Token, TokenStream, Ast, YayException, Cycle, Parser, Context}; +use Yay\{Engine, Token, TokenStream, Ast, YayException, Cycle, Parser, Context}; use function Yay\{ token, rtoken, identifier, chain, either, any, parentheses, braces, traverse, midrule }; @@ -47,12 +47,12 @@ function concat(TokenStream $ts) : TokenStream { return TokenStream::fromSequence(new Token(T_STRING, $buffer)); } -function hygienize(TokenStream $ts, Context $context) : TokenStream { +function hygienize(TokenStream $ts, Engine $engine) : TokenStream { $ts->reset(); $cg = (object)[ 'node' => null, - 'scope' => $context->get('scope'), + 'scope' => $engine->cycle()->id(), 'ts' => $ts ]; @@ -93,9 +93,11 @@ function hygienize(TokenStream $ts, Context $context) : TokenStream { function unsafe(TokenStream $ts) : TokenStream { return $ts; } -function expand(TokenStream $ts, Context $context) : TokenStream { - $ts = TokenStream::fromSource(yay_parse('get('directives'), $context->get('blueContext'))); - $ts->shift(); +function expand(TokenStream $ts, Engine $engine) : TokenStream { + + // var_dump($engine); + + $ts = TokenStream::fromSource($engine->expand((string) $ts, '', Engine::GC_ENGINE_DISABLED)); return $ts; } diff --git a/tests/MacroScopeTest.php b/tests/MacroScopeTest.php index 5737c3e..045ee1b 100644 --- a/tests/MacroScopeTest.php +++ b/tests/MacroScopeTest.php @@ -12,7 +12,9 @@ class MacroScopeTest extends \PHPUnit_Framework_TestCase { ABSOLUTE_FIXTURES_DIR = __DIR__ . '/' . self::FIXTURES_DIR ; - function setUp() { StreamWrapper::register(); } + function setUp() { + StreamWrapper::register(new Engine); + } function testGlobalMacro() { include 'yay://' . self::ABSOLUTE_FIXTURES_DIR . '/macros.php'; diff --git a/tests/SpecsTest.php b/tests/SpecsTest.php index fa3b586..50bac5c 100644 --- a/tests/SpecsTest.php +++ b/tests/SpecsTest.php @@ -10,6 +10,8 @@ PHPUnit_Framework_Assert as Assert ; +use PhpParser\{ ParserFactory, PrettyPrinter }; + /** * @group large */ @@ -71,6 +73,7 @@ class Test { protected + $engine, $status, $name, $source, @@ -87,7 +90,8 @@ function __construct(string $file) { $this->file_expect = preg_replace('/\.phpt$/', '.exp', $this->file); $this->file_diff = preg_replace('/\.phpt$/', '.diff', $this->file); $this->file_out = preg_replace('/\.phpt$/', '.out', $this->file); - // $this->file_php = preg_replace('/\.phpt$/', '.php', $this->file); + + $this->engine = new Engine; } function run() { @@ -101,9 +105,14 @@ function run() { list($this->name, $this->source, $this->expected) = $sections; try { - $this->out = yay_parse($this->source); - if (false !== strpos($this->name, '--pretty-print')) - $this->out = yay_pretty($this->out) . PHP_EOL . PHP_EOL . '?>'; + $this->out = $this->engine->expand($this->source, $this->file); + + if (false !== strpos($this->name, '--pretty-print')) { + $parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7); + $prettyPrinter = new PrettyPrinter\Standard; + $stmts = $parser->parse($this->out); + $this->out = $prettyPrinter->prettyPrintFile($stmts) . PHP_EOL . PHP_EOL . '?>'; + } } catch(YayParseError $e) { $this->out = $e->getMessage(); // $this->out = (string) $e; diff --git a/tests/StreamWrapperTest.php b/tests/StreamWrapperTest.php index 04b2924..15a0f98 100644 --- a/tests/StreamWrapperTest.php +++ b/tests/StreamWrapperTest.php @@ -12,7 +12,9 @@ class StreamWrapperTest extends \PHPUnit_Framework_TestCase { ABSOLUTE_FIXTURES_DIR = __DIR__ . '/' . self::FIXTURES_DIR ; - function setUp() { StreamWrapper::register(); } + function setUp() { + StreamWrapper::register(new Engine); + } function syntaxErrorProvider() { diff --git a/yay_parse.php b/yay_parse.php deleted file mode 100644 index 38276d2..0000000 --- a/yay_parse.php +++ /dev/null @@ -1,105 +0,0 @@ -add($d); - - $cg = (object) [ - 'ts' => TokenStream::fromSource($source), - 'directives' => $directives, - 'cycle' => new Cycle($source), - 'globalDirectives' => $globalDirectives, - 'blueContext' => $blueContext, - ]; - - traverse - ( - // this midrule is where the preprocessor really does the job! - midrule(function(TokenStream $ts) use ($cg) { - $cg->directives->apply($ts, $cg->blueContext); - }) - , - // here we parse, compile and allocate new macros - consume - ( - chain - ( - token(T_STRING, 'macro')->as('declaration') - , - optional - ( - repeat - ( - rtoken('/^·\w+$/') - ) - ) - ->as('tags') - , - lookahead - ( - token('{') - ) - , - commit - ( - chain - ( - braces()->as('pattern') - , - operator('>>') - , - braces()->as('expansion') - ) - ) - ->as('body') - , - optional - ( - token(';') - ) - ) - , - CONSUME_DO_TRIM - ) - ->onCommit(function(Ast $macroAst) use ($cg) { - $scope = Map::fromEmpty(); - $tags = Map::fromValues(array_map('strval', $macroAst->{'tags'})); - $pattern = new Pattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); - $expansion = new Expansion($macroAst->{'body expansion'}, $tags, $scope); - - $macro = new Macro($tags, $pattern, $expansion, $cg->cycle); - - $cg->directives->add($macro); // allocate the userland macro - - // allocate the userland macro globally if it's declared as global - if ($macro->tags()->contains('·global')) $cg->globalDirectives[] = $macro; - }) - ) - ->parse($cg->ts); - - $expansion = (string) $cg->ts; - - if ($gc) gc_enable(); - - return $expansion; -} diff --git a/yay_pretty.php b/yay_pretty.php deleted file mode 100644 index 8b20394..0000000 --- a/yay_pretty.php +++ /dev/null @@ -1,11 +0,0 @@ -create(ParserFactory::PREFER_PHP7); - $prettyPrinter = new PrettyPrinter\Standard; - $stmts = $parser->parse($source); - - return $prettyPrinter->prettyPrintFile($stmts); -} From 52c53d0b20fd1efdf7bcf425fac545c2446d2e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Sat, 25 Feb 2017 07:56:12 -0400 Subject: [PATCH 12/16] remove special case for macros with empty expansion. This means that the engine won't try to elide the excessive empty space left behind when a macro expands to nothing. This relates to code formatting and is not YAY responsibility. Why did I edge case in the first place? :wonders: A pretty printer should be used after the expansion from now on, if beauty is desired :) --- src/Macro.php | 24 ++++++----------------- tests/phpt/macro/empty_expansion_001.phpt | 2 +- tests/phpt/macro/empty_expansion_002.phpt | 6 ++++-- tests/phpt/macro/empty_expansion_003.phpt | 6 ++++++ 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/Macro.php b/src/Macro.php index dad26f6..59f760b 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -8,8 +8,7 @@ class Macro implements Directive { $pattern, $expansion, $tags, - $terminal = true, - $hasExpansion = true + $isTerminal ; private @@ -24,8 +23,7 @@ function __construct(Map $tags, Pattern $pattern, Expansion $expansion) { $this->pattern = $pattern; $this->expansion = $expansion; - $this->terminal = !$this->expansion->isRecursive(); - $this->hasExpansion = !$this->expansion->isEmpty(); + $this->isTerminal = !$this->expansion->isRecursive(); } function id() : int { @@ -50,14 +48,12 @@ function apply(TokenStream $ts, Engine $engine) { $crossover = $this->pattern->match($ts); - if (null === $crossover || $crossover instanceof Error) return; - - if ($this->hasExpansion) { + if ($crossover instanceof Ast ) { $blueContext = $engine->blueContext(); $blueMacros = $this->getAllBlueMacrosFromCrossover($crossover->all(), $blueContext); - if ($this->terminal && isset($blueMacros[$this->id])) { // already expanded + if ($this->isTerminal && isset($blueMacros[$this->id])) { // already expanded $ts->jump($from); return; @@ -79,17 +75,9 @@ function apply(TokenStream $ts, Engine $engine) { } $ts->inject($expansion); - } - else { - $ts->unskip(); - while (null !== ($token = $ts->current()) && $token->is(T_WHITESPACE)) { - $ts->step(); - } - $to = $ts->index(); - $ts->extract($from, $to); - } - $engine->cycle()->next(); + $engine->cycle()->next(); + } } private function getAllBlueMacrosFromCrossover($node, BlueContext $blueContext): array { diff --git a/tests/phpt/macro/empty_expansion_001.phpt b/tests/phpt/macro/empty_expansion_001.phpt index 7d25760..553b919 100644 --- a/tests/phpt/macro/empty_expansion_001.phpt +++ b/tests/phpt/macro/empty_expansion_001.phpt @@ -27,7 +27,7 @@ HTML here, this should be preserved: @ "debug" public test(); class X { - function test(){} + function test(){} } ?> diff --git a/tests/phpt/macro/empty_expansion_002.phpt b/tests/phpt/macro/empty_expansion_002.phpt index 466153c..b81836f 100644 --- a/tests/phpt/macro/empty_expansion_002.phpt +++ b/tests/phpt/macro/empty_expansion_002.phpt @@ -25,9 +25,11 @@ DEBUG(); $foo->bar; -// match + // match + + // match + -// match DEBUG(); diff --git a/tests/phpt/macro/empty_expansion_003.phpt b/tests/phpt/macro/empty_expansion_003.phpt index 38cde28..60f5ae0 100644 --- a/tests/phpt/macro/empty_expansion_003.phpt +++ b/tests/phpt/macro/empty_expansion_003.phpt @@ -17,4 +17,10 @@ macro { @ T_STRING·label ; } >> { }; --EXPECTF-- From b59fd75edda5b20ee7684d655d12787f01dff7b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Sun, 26 Feb 2017 07:18:24 -0400 Subject: [PATCH 13/16] transpose line numbers of expansion tokens This ensures correctness of line numbers of syntax errors when they happen with an expanded token. --- src/Expansion.php | 3 ++- src/Macro.php | 2 +- src/expanders.php | 5 ++-- tests/phpt/errors/error_line_number.phpt | 32 ++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 tests/phpt/errors/error_line_number.phpt diff --git a/src/Expansion.php b/src/Expansion.php index afd9d80..d07cc9a 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -98,7 +98,8 @@ private function compile(array $expansion, Map $context) : TokenStream { TokenStream::fromSequence( new Token( Token::CLOAKED, - implode('', $result->cloaked) + implode('', $result->cloaked), + $result->cloaked[0]->line() ) ) ); diff --git a/src/Macro.php b/src/Macro.php index 59f760b..dfc6a33 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -67,9 +67,9 @@ function apply(TokenStream $ts, Engine $engine) { $blueMacros[$this->id] = true; - // paint blue context with tokens from expansion and disabled macros $node = $expansion->index(); while ($node instanceof Node) { + // paint blue context with tokens from expansion and disabled macros $blueContext->addDisabledMacros($node->token, $blueMacros); $node = $node->next; } diff --git a/src/expanders.php b/src/expanders.php index 70a4c45..3d90e00 100644 --- a/src/expanders.php +++ b/src/expanders.php @@ -13,7 +13,7 @@ function stringify(TokenStream $ts) : TokenStream { return TokenStream::fromSequence( new Token( - T_CONSTANT_ENCAPSED_STRING, "'{$str}'" + T_CONSTANT_ENCAPSED_STRING, "'{$str}'", $ts->first()->line() ) ) ; @@ -34,6 +34,7 @@ function unvar(TokenStream $ts) : TokenStream { function concat(TokenStream $ts) : TokenStream { $ts->reset(); $buffer = ''; + $line = $ts->current()->line(); while($t = $ts->current()) { $str = (string) $t; if (! preg_match('/^\w+$/', $str)) @@ -44,7 +45,7 @@ function concat(TokenStream $ts) : TokenStream { $ts->next(); } - return TokenStream::fromSequence(new Token(T_STRING, $buffer)); + return TokenStream::fromSequence(new Token(T_STRING, $buffer, $line)); } function hygienize(TokenStream $ts, Engine $engine) : TokenStream { diff --git a/tests/phpt/errors/error_line_number.phpt b/tests/phpt/errors/error_line_number.phpt new file mode 100644 index 0000000..633fbd8 --- /dev/null +++ b/tests/phpt/errors/error_line_number.phpt @@ -0,0 +1,32 @@ +--TEST-- +Ensures preprocessor syntax errors occurs in the right line number +--FILE-- +> { + + function expansion() + { + ·captured ·captured; + } + +} + +macro { + + ·token(T_STRING, 'foo') · expected + +} >> { + + END; +} + +foo; // L:25 + +?> +--EXPECTF-- +Unexpected T_STRING(foo) on line 25, expected T_STRING(expected). From 93957f06bddcb1ecdb19e014305d932a3d4eaa13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Mon, 27 Feb 2017 19:36:14 -0400 Subject: [PATCH 14/16] typesafe Ast access refactory --- src/Ast.php | 92 ++++++++++++++++++++++++++++++++++---------- src/Expansion.php | 25 ++++++------ src/Macro.php | 2 +- src/Pattern.php | 32 +++++++-------- src/parsers.php | 2 +- tests/AstTest.php | 82 +++++++++++++++++++++++++++++++++++++++ tests/ParserTest.php | 2 +- 7 files changed, 185 insertions(+), 52 deletions(-) create mode 100644 tests/AstTest.php diff --git a/src/Ast.php b/src/Ast.php index 5d81616..2795a04 100644 --- a/src/Ast.php +++ b/src/Ast.php @@ -3,7 +3,9 @@ namespace Yay; use - InvalidArgumentException + InvalidArgumentException, + ArrayIterator, + TypeError ; /** @@ -22,7 +24,7 @@ class Ast implements Result, Context { function __construct(string $label = null, $ast = []) { if ($ast instanceof self) - throw new InvalidArgumentException('Unmerged AST.'); + throw new TypeError('Unmerged AST.'); $this->ast = $ast; $this->label = $label; @@ -32,33 +34,77 @@ function __get($path) { return $this->get($path); } - function get($path) { - $ret = - $this->getIn( - (null !== $this->label ? $this->all() : $this->ast), - preg_split('/\s+/', $path) - ) - ; + function get($strPath) { + $ret = null; + $path = preg_split('/\s+/', $strPath); - if (null === $ret && $this->parent) $ret = $this->parent->get($path); + if ($wrap = ('*' === $path[0])) { + array_shift($path); + } + + try { + $ret = $this->getIn($this->ast, $path); + + if (null === $ret && $this->parent) $ret = $this->parent->get($strPath); + + if ($wrap) { + $label = end($path) ?: null; + $ret = new self($label, $ret); + } + } + catch(TypeError $e) { + if ($wrap) { + throw new \Yay\YayException("Could not access (Ast)->{'" . implode(' ', $path) . "'}."); + } + } return $ret; } - function raw() { + function unwrap() { return $this->ast; } - function token() : Token { - return $this->ast; + function token() { + if ($this->ast instanceof Token) return $this->ast; + + $this->failCasting(Token::class); } - function array() : array { - return $this->ast; + function null() { + if (\is_null($this->ast)) return $this->ast; + + $this->failCasting('null'); } - function all() { - return [($this->label ?? 0) => $this->ast]; + function bool() { + if (\is_bool($this->ast)) return $this->ast; + + $this->failCasting('boolean'); + } + + function string() { + if (\is_string($this->ast)) return $this->ast; + + $this->failCasting('string'); + } + + + function array() { + if (\is_array($this->ast)) return $this->ast; + + $this->failCasting('array'); + } + + function list() { + $array = $this->array(); + + reset($array); + + $isAssociative = \count(array_filter(array_keys($array), 'is_string')) > 0; + + foreach ($array as $label => $value) + yield new Ast(($isAssociative ? $label : null), $value); } function append(self $ast) : self { @@ -81,7 +127,7 @@ function push(self $ast) : self { } function isEmpty() : bool { - return ! count($this->ast); + return !\count($this->ast) || null === $this->ast; } function as(/*string|null*/ $label = null) : self { @@ -97,7 +143,7 @@ function withParent(self $parent) : self { } function symbols() : array { - return array_keys($this->all()[0]); + return \is_array($this->ast) ? \array_keys($this->ast) : []; } /** @@ -110,13 +156,13 @@ private function getIn(array $array, array $keys, $default = null) } // This is a micro-optimization, it is fast for non-nested keys, but fails for null values - if (count($keys) === 1 && isset($array[$keys[0]])) { + if (\count($keys) === 1 && isset($array[$keys[0]])) { return $array[$keys[0]]; } $current = $array; foreach ($keys as $key) { - if (!is_array($current) || !array_key_exists($key, $current)) { + if (!\is_array($current) || !\array_key_exists($key, $current)) { return $default; } @@ -125,4 +171,8 @@ private function getIn(array $array, array $keys, $default = null) return $current; } + + private function failCasting(string $type) { + throw new YayException(sprintf("Ast cannot be casted to '%s'", $type)); + } } diff --git a/src/Expansion.php b/src/Expansion.php index d07cc9a..77b0f24 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -169,11 +169,11 @@ private function mutate(TokenStream $ts, Ast $context, Engine $engine) : TokenSt ( token(Token::CLOAKED) , - rtoken('/^T_\w+·\w+$/')->as('label')->onCommit(function(Ast $result) use ($states) { + rtoken('/^T_\w+·\w+$/')->onCommit(function(Ast $result) use ($states) { $cg = $states->current(); $ts = $cg->ts; - $token = $cg->this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + $token = $cg->this->lookupContext($result->token(), $cg->context, self::E_UNDEFINED_EXPANSION); $ts->previous(); $node = $ts->index(); @@ -198,10 +198,11 @@ private function mutate(TokenStream $ts, Ast $context, Engine $engine) : TokenSt ->onCommit(function(Ast $result) use ($states) { $cg = $states->current(); - $expander = $result->expander; - if (\count($result->args) === 0) + $expander = $result->{'expander'}; + if (\count($result->{'args'}) === 0) $cg->this->fail(self::E_EMPTY_EXPANDER_SLICE, (string) $expander, $expander->line()); - $expansion = TokenStream::fromSlice($result->args); + + $expansion = TokenStream::fromSlice($result->{'args'}); $mutation = $cg->this->mutate($expansion, $cg->context, $cg->engine); $mutation = $cg->this->lookupExpander($expander)($mutation, $cg->engine); @@ -230,19 +231,19 @@ private function mutate(TokenStream $ts, Ast $context, Engine $engine) : TokenSt $cg = $states->current(); if (null !== $result->optional) - $context = $cg->this->lookupContextOptional($result->label, $cg->context); + $context = $cg->this->lookupContextOptional($result->{'label'}, $cg->context); else - $context = $cg->this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + $context = $cg->this->lookupContext($result->{'label'}, $cg->context, self::E_UNDEFINED_EXPANSION); if ($context === null) return; - $delimiters = $result->delimiters; + $delimiters = $result->{'delimiters'}; // normalize single context if (array_values($context) !== $context) $context = [$context]; foreach (array_reverse($context) as $i => $subContext) { - $expansion = TokenStream::fromSlice($result->expansion); + $expansion = TokenStream::fromSlice($result->{'expansion'}); $mutation = $cg->this->mutate( $expansion, (new Ast(null, $subContext))->withParent($cg->context), @@ -255,12 +256,12 @@ private function mutate(TokenStream $ts, Ast $context, Engine $engine) : TokenSt , consume ( - rtoken('/^·\w+|···\w+$/')->as('label') + rtoken('/^·\w+|···\w+$/') ) ->onCommit(function(Ast $result) use ($states) { $cg = $states->current(); - $context = $cg->this->lookupContext($result->label, $cg->context, self::E_UNDEFINED_EXPANSION); + $context = $cg->this->lookupContext($result->token(), $cg->context, self::E_UNDEFINED_EXPANSION); if ($context instanceof Token) { $cg->ts->inject(TokenStream::fromSequence($context)); @@ -281,7 +282,7 @@ function(Token $token) use(&$tokens) { $cg = (object) [ 'ts' => $ts, - 'context' => $context, + 'context' => $context->label() ? new Ast(null, [$context->label() => $context->unwrap()]) : $context, 'engine' => $engine, 'this' => $this, ]; diff --git a/src/Macro.php b/src/Macro.php index dfc6a33..8c92ebe 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -51,7 +51,7 @@ function apply(TokenStream $ts, Engine $engine) { if ($crossover instanceof Ast ) { $blueContext = $engine->blueContext(); - $blueMacros = $this->getAllBlueMacrosFromCrossover($crossover->all(), $blueContext); + $blueMacros = $this->getAllBlueMacrosFromCrossover($crossover->unwrap(), $blueContext); if ($this->isTerminal && isset($blueMacros[$this->id])) { // already expanded $ts->jump($from); diff --git a/src/Pattern.php b/src/Pattern.php index 20ce978..4e115a5 100644 --- a/src/Pattern.php +++ b/src/Pattern.php @@ -115,8 +115,7 @@ private function compile(array $tokens) { ) ) ->onCommit(function(Ast $result) use($cg) { - $cg->parsers[] = $this->compileParser( - $result->type, $result->args, $result->label); + $cg->parsers[] = $this->compileParser($result); }) , // handles {···layer} @@ -239,40 +238,41 @@ private function lookupParser(Token $token) : string { return $parser; } - private function compileParser(Token $type, array $args, Token $label = null) : Parser - { - $parser = $this->lookupParser($type); - $args = $this->compileParserArgs($args); + protected function compileParser(Ast $ast) : Parser { + $parser = $this->lookupParser($ast->{'* type'}->token()); + $args = $this->compileParserArgs($ast->{'* args'}); $parser = $parser(...$args); - if ($label) + if ($label = $ast->{'label'}) $parser->as($this->lookupCapture($label)); return $parser; } - private function compileParserArgs(array $args) : array { + protected function compileParserArgs(Ast $args) : array { $compiled = []; - foreach ($args as $label => $arg) switch ((string) $label) { + + foreach ($args->list() as $arg) switch ((string) $arg->label()) { case 'this': $compiled[] = future($this->pattern); break; case 'token': - $type = $this->lookupTokenType($arg); - $label = $this->lookupCapture($arg); + $token = $arg->token(); + $type = $this->lookupTokenType($token); + $label = $this->lookupCapture($token); $compiled[] = token($type)->as($label); break; case 'label': - $compiled[] = token($arg); + case 'literal': + $compiled[] = token($arg->token()); break; case 'parser': - $compiled[] = $this->compileParser( - $arg['type'], $arg['args'], $arg['label']); + $compiled[] = $this->compileParser($arg); break; case 'string': - $compiled[] = trim((string) $arg, '"\''); + $compiled[] = trim((string) $arg->token(), '"\''); break; case 'constant': // T_* - $compiled[] = $this->lookupTokenType($arg); + $compiled[] = $this->lookupTokenType($arg->token()); break; case 'function': // function(...){...} $compiled[] = $this->compileAnonymousFunctionArg($arg); diff --git a/src/parsers.php b/src/parsers.php index 93311cd..6b44510 100644 --- a/src/parsers.php +++ b/src/parsers.php @@ -610,7 +610,7 @@ function optional(Parser $parser, $default = []) : Parser protected function parser(TokenStream $ts, Parser $parser, $default) : Ast { $result = $parser->parse($ts); - $match = ($result instanceof Ast) ? $result->raw() : $default; + $match = ($result instanceof Ast) ? $result->unwrap() : $default; return (new Ast($parser->label, $match))->as($this->label); } diff --git a/tests/AstTest.php b/tests/AstTest.php new file mode 100644 index 0000000..47bc29b --- /dev/null +++ b/tests/AstTest.php @@ -0,0 +1,82 @@ + ['baz' => true, 'buz' => false]]); + + $this->assertSame('foo', $ast->label()); + $this->assertSame(['bar'], $ast->symbols()); + + $childAst = $ast->{'* foo bar'}; + $this->assertInstanceOf(Ast::class, $childAst); + $this->assertSame('bar', $childAst->label()); + $this->assertSame([], $childAst->symbols()); + $this->assertSame(null, $childAst->unwrap()); + $this->assertNull($childAst->{'baz'}); + $this->assertNull($childAst->{'buz'}); + $this->assertNull($childAst->{'undefined'}); + + $childAst = $ast->{'* bar'}; + $this->assertInstanceOf(Ast::class, $childAst); + $this->assertSame('bar', $childAst->label()); + $this->assertSame(['baz', 'buz'], $childAst->symbols()); + $this->assertSame(['baz' => true, 'buz' => false], $childAst->unwrap()); + $this->assertTrue($childAst->{'baz'}); + $this->assertFalse($childAst->{'buz'}); + $this->assertNull($childAst->{'undefined'}); + } + + function providerForTestMapAstCastOnFailure() { + return [ + ['* defined', 'null', 'null'], + ['* undefined', 'bool', 'boolean'], + ['* undefined', 'array', 'array'], + ['* undefined', 'token', preg_quote(Token::class)], + ]; + } + + /** + * @dataProvider providerForTestMapAstCastOnFailure + */ + function testMapAstCastOnFailure(string $path, string $castMethod, string $typeName) { + $this->setExpectedExceptionRegExp(YayException::class, "/^Ast cannot be casted to '{$typeName}'$/"); + $ast = new Ast(null, ['defined' => true]); + var_dump($ast->{$path}->$castMethod()); + } + + function providerForTestAstCast() { + return [ + ['* some null', 'null', null], + ['* some boolean', 'bool', true], + ['* some string', 'string', 'foo'], + ['* some array', 'array', ['foo', 'bar']], + ['* some token', 'token', new Token(';')], + ]; + } + + /** + * @dataProvider providerForTestAstCast + */ + function testAstCast(string $path, string $castMethod, $expected) { + $ast = new Ast(null, [ + 'some' => [ + 'null' => null, + 'boolean' => true, + 'string' => 'foo', + 'array' => ['foo', 'bar'], + 'token' => new Token(';') + ] + ]); + + if ($expected instanceof Token) + $this->assertEquals((string) $expected, (string) $ast->{$path}->$castMethod()); + else + $this->assertEquals($expected, $ast->{$path}->$castMethod()); + } +} diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 8e9a6e5..48cf990 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -59,7 +59,7 @@ function($ast) use ($expected, &$commited){ $this->assertTrue($commited, "Missing commit on {$parser}()."); $buffer = []; - $astArray = $ast->all(); + $astArray = [$ast->unwrap()]; array_walk_recursive($astArray, function(Token $token) use (&$buffer){ $buffer[] = $token->dump(); }); From fda1ac3b9725d39a35f76ec8cd8c8960e1645f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Tue, 21 Feb 2017 23:06:32 -0400 Subject: [PATCH 15/16] add PEG macro flavor --- src/Ast.php | 8 +- src/Engine.php | 9 +- src/Expansion.php | 8 - src/GrammarPattern.php | 393 ++++++++++++++++++++++++++++++ src/Macro.php | 2 +- src/MacroMember.php | 8 + src/Map.php | 10 +- src/Pattern.php | 15 +- src/PatternInterface.php | 12 + src/parsers.php | 65 ++++- tests/ParserTest.php | 20 +- tests/phpt/group_use_grammar.phpt | 44 ++++ tests/phpt/json.phpt | 55 +++++ 13 files changed, 612 insertions(+), 37 deletions(-) create mode 100644 src/GrammarPattern.php create mode 100644 src/PatternInterface.php create mode 100644 tests/phpt/group_use_grammar.phpt create mode 100644 tests/phpt/json.phpt diff --git a/src/Ast.php b/src/Ast.php index 2795a04..de40bab 100644 --- a/src/Ast.php +++ b/src/Ast.php @@ -130,12 +130,16 @@ function isEmpty() : bool { return !\count($this->ast) || null === $this->ast; } - function as(/*string|null*/ $label = null) : self { - if (null !== $label && null === $this->label) $this->label = $label; + function as(string $label = null) : self { + if (null !== $label) $this->label = $label; return $this; } + function label() { + return $this->label; + } + function withParent(self $parent) : self { $this->parent = $parent; diff --git a/src/Engine.php b/src/Engine.php index 96773fa..ecb0217 100644 --- a/src/Engine.php +++ b/src/Engine.php @@ -105,7 +105,14 @@ function __construct() { ->onCommit(function(Ast $macroAst) { $scope = Map::fromEmpty(); $tags = Map::fromValues(array_map('strval', $macroAst->{'tags'})); - $pattern = new Pattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + + if ($tags->contains('·grammar')) { + $pattern = new GrammarPattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + } + else { + $pattern = new Pattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + } + $expansion = new Expansion($macroAst->{'body expansion'}, $tags, $scope); $macro = new Macro($tags, $pattern, $expansion); diff --git a/src/Expansion.php b/src/Expansion.php index 77b0f24..5fe454b 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -6,14 +6,6 @@ class Expansion extends MacroMember { - const - PRETTY_PRINT = - JSON_PRETTY_PRINT - | JSON_BIGINT_AS_STRING - | JSON_UNESCAPED_UNICODE - | JSON_UNESCAPED_SLASHES - ; - const E_BAD_EXPANSION = "Bad macro expansion identifier '%s' on line %d.", E_BAD_EXPANDER = "Bad macro expander '%s' on line %d.", diff --git a/src/GrammarPattern.php b/src/GrammarPattern.php new file mode 100644 index 0000000..e194d4c --- /dev/null +++ b/src/GrammarPattern.php @@ -0,0 +1,393 @@ +fail(self::E_EMPTY_PATTERN, $line); + + $this->scope = $scope; + $this->unreached = new Map; + $this->staged = new Map; + $this->collected = new Map; + $this->pattern = $this->compile($line, $pattern); + } + + private function compile(int $line, array $tokens) { + + $label = rtoken('/^·\w+$/')->as('label'); + + $doubleQuotes = token(T_CONSTANT_ENCAPSED_STRING, "''"); + + $commit = chain(token('!'), token('!'))->as('commit'); + + $literal = between($doubleQuotes, any(), $doubleQuotes)->as('literal'); + + $constant = rtoken('/^T_\w+$/')->as('constant'); + + $optionalModifier = optional(token('?'), false)->as('optional?'); + + $productionModifier = optional(token(T_SL), false)->as('production?'); + + $parser = ( + chain + ( + rtoken('/^·\w+$/')->as('type') + , + token('(') + , + optional + ( + ls + ( + either + ( + future + ( + $parser // recursion !!! + ) + ->as('parser') + , + chain + ( + token(T_FUNCTION) + , + parentheses()->as('args') + , + braces()->as('body') + ) + ->as('function') + , + string()->as('string') + , + rtoken('/^T_\w+·\w+$/')->as('token') + , + rtoken('/^T_\w+$/')->as('constant') + , + label()->as('label') + ) + , + token(',') + ) + ) + ->as('args') + , + commit + ( + token(')') + ) + , + optional + ( + rtoken('/^·\w+$/')->as('label'), null + ) + ) + ->as('parser') + ); + + $labelReference = + chain + ( + $label + , + optional + ( + chain + ( + token('{') + , + token('}') + , + $label + ) + , + null + ) + ->as('alias') + ) + ->as('reference') + ; + + $list = + chain + ( + $optionalModifier + , + token(T_LIST) + , + token('(') + , + future($sequence)->as('member') + , + token(',') + , + (clone $literal)->as('delimiter') + , + token(')') + ) + ->as('list') + ; + + $sequence = + repeat + ( + either + ( + $list + , + $parser + , + $labelReference + , + $constant + , + $literal + , + $commit + ) + ) + ->as('sequence') + ; + + $disjunction = ls($sequence, token('|'))->as('disjunction'); + + $rule = + commit + ( + chain + ( + $productionModifier + , + $label + , + $optionalModifier + , + either + ( + between + ( + token('{') + , + $sequence + , + token('}') + ) + , + between + ( + token('{') + , + $disjunction + , + token('}') + ) + ) + ) + ) + ->as('rule') + ; + + $grammar = + commit + ( + chain + ( + optional + ( + repeat($rule) + ) + ->as('rules') + ) + ) + ; + + $grammarAst = $grammar->parse(TokenStream::fromSlice($tokens)); + + $productions = new Map; + + foreach ($grammarAst->{'* rules'}->list() as $ast) { + + $ruleAst = $ast->{'* rule'}; + $labelAst = $ast->{'* rule label'}; + $label = (string) $labelAst->token(); + + if ($ruleAst->{'production?'}) { + + $productions->add($label, $ruleAst); + + if ($productions->count() > 1) { + $this->fail( + self::E_GRAMMAR_MULTIPLE_PRODUCTIONS, + $line, + json_encode($productions->symbols(), self::PRETTY_PRINT) + ); + } + + continue; + } + + if ($this->unreached->contains($label)) + $this->fail(self::E_GRAMMAR_DUPLICATED_RULE_LABEL, $label, $labelAst->token()->line()); + + $this->unreached->add($label, $ruleAst); + } + + if ($productions->count() === 0) + $this->fail(self::E_GRAMMAR_MISSING_PRODUCTION, $line); + + $productionLabel = $productions->symbols()[0]; + + $this->scope->add($productionLabel); + + $pattern = $this->compilePattern($productions->get($productionLabel)); + + if ($this->unreached->count() > 0) { + $this->fail( + self::E_GRAMMAR_UNREACHABLE_NONTERMINAL, + $productionLabel, + $line, + json_encode($this->unreached->symbols(), self::PRETTY_PRINT) + ); + } + + if ($this->staged->count() > 0) { + $this->fail( + self::E_GRAMMAR_UNREACHABLE_NONTERMINAL, + $productionLabel, + $line, + json_encode($this->staged->symbols(), self::PRETTY_PRINT) + ); + } + + $this->specificity = $this->collected->count(); + + if (! $pattern->isFallible()) + $this->fail(self::E_GRAMMAR_NON_FALLIBLE, $productionLabel, $line); + + return $pattern; + } + + private function compilePattern(Ast $rule) : Parser { + + $label = (string) $rule->{'label'}; + + if (! ($sequence = $rule->{'* sequence'})->isEmpty()) + $pattern = $this->compileSequence($sequence, $label); + else if(! ($disjunction = $rule->{'* disjunction'})->isEmpty()) + $pattern = $this->compileDisjunction($disjunction, $label); + else + assert(false, 'Unknown pattern definition.'); + + if ($rule->{'optional?'}) $pattern = optional($pattern); + + $pattern->as($label); + + return $pattern; + } + + private function compileSequence(Ast $sequence, string $label) : Parser { + $commit = false; + $this->staged->add($label); + $chain = []; + foreach ($sequence->list() as $step) { + foreach ($step->list() as $ast) { + $type = $ast->label(); + switch ($type) { + case 'literal': // matches double quoted like: '','' or ''use'' + $chain[] = token($ast->token()); + break; + case 'constant': // T_* + $chain[] = token(parent::lookupTokenType($ast->token())); + break; + case 'parser': + $chain[] = parent::compileParser($ast); + break; + case 'reference': + $refLabel = (string) $ast->{'label'}; + $link = $this->collected->get($refLabel); + if ($link === null) { + if ($this->staged->contains($refLabel)) { + $link = future($this->references[$refLabel]); + } + else { + $link = $this->compilePattern($this->unreached->get($refLabel)); + $this->references[$refLabel] = $link; + $this->collected->add($refLabel, $link); + $this->unreached->remove($refLabel); + } + } + + $link = (clone $link)->as((string) $ast->{'alias label'} ?: null); + + $chain[] = $link; + break; + case 'list': + $link = $this->compileSequence($ast->{'* member'}, $label); + $chain[] = optional(ls($link, token($ast->{'* delimiter'}->token()))); + break; + case 'commit': + $commit = true; + break; + default: + assert(false, 'Unknown sequence step.'); + break; + } + + if ($commit && ($length = count($chain)) > 0) $chain[$length-1] = commit(end($chain)); + } + } + + if (count($chain) > 1) { + $pattern = chain(...$chain); + $pattern->as($label); + } + else { + $pattern = $chain[0]; + } + + $this->staged->remove($label); + + return $pattern; + } + + private function compileDisjunction(Ast $disjunctions, string $label) : Parser { + + $this->staged->add($label); + + $chain = []; + foreach ($disjunctions->list() as $disjunction) { + foreach ($disjunction->list() as $sequence) { + $link = $this->compileSequence($sequence, $label); + $this->collected->add($label, $link); + $this->unreached->remove($label); + $chain[] = $link; + } + } + + $pattern = either(...$chain)->as($label); + + $this->staged->remove($label); + + return $pattern; + } +} diff --git a/src/Macro.php b/src/Macro.php index 8c92ebe..5809d76 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -17,7 +17,7 @@ class Macro implements Directive { protected static $_id = 0; - function __construct(Map $tags, Pattern $pattern, Expansion $expansion) { + function __construct(Map $tags, PatternInterface $pattern, Expansion $expansion) { $this->id = (__CLASS__)::$_id++; $this->tags = $tags; $this->pattern = $pattern; diff --git a/src/MacroMember.php b/src/MacroMember.php index 6ecee51..78e7370 100644 --- a/src/MacroMember.php +++ b/src/MacroMember.php @@ -4,6 +4,14 @@ abstract class MacroMember { + const + PRETTY_PRINT = + JSON_PRETTY_PRINT + | JSON_BIGINT_AS_STRING + | JSON_UNESCAPED_UNICODE + | JSON_UNESCAPED_SLASHES + ; + protected function fail(string $error, ...$args) { throw new YayParseError(sprintf($error, ...$args)); } diff --git a/src/Map.php b/src/Map.php index c0c02fa..dcdf002 100644 --- a/src/Map.php +++ b/src/Map.php @@ -2,7 +2,7 @@ namespace Yay; -class Map implements Context { +class Map implements Context, \Countable { protected $map = []; function get($key) { @@ -13,6 +13,10 @@ function add($key, $value = true) { return $this->map[$key] = $value; } + function remove($key) { + unset($this->map[$key]); + } + function contains($key) : bool { return isset($this->map[$key]); } @@ -21,6 +25,10 @@ function symbols() : array { return array_keys($this->map); } + function count() : int { + return count($this->map); + } + static function fromValues(array $values = []) : self { $m = self::fromEmpty(); foreach($values as $value) $m->add($value); diff --git a/src/Pattern.php b/src/Pattern.php index 4e115a5..9b625fa 100644 --- a/src/Pattern.php +++ b/src/Pattern.php @@ -2,7 +2,7 @@ namespace Yay; -class Pattern extends MacroMember { +class Pattern extends MacroMember implements PatternInterface { const E_BAD_CAPTURE = "Bad macro capture identifier '%s' on line %d.", @@ -95,8 +95,15 @@ private function compile(array $tokens) { rtoken('/^·this$/')->as('this') , label()->as('label') + , + between + ( + token(T_CONSTANT_ENCAPSED_STRING, "''"), + any(), + token(T_CONSTANT_ENCAPSED_STRING, "''") + ) + ->as('literal') ) - ->as('parser') , token(',') ) @@ -207,7 +214,7 @@ private function layer(string $start, string $end, Parser $parser, $cg) : Parser }); } - private function lookupTokenType(Token $token) : int { + protected function lookupTokenType(Token $token) : int { $type = explode('·', (string) $token)[0]; if (! defined($type)) $this->fail(self::E_BAD_TOKEN_TYPE, $type, $token->line()); @@ -215,7 +222,7 @@ private function lookupTokenType(Token $token) : int { return constant($type); } - protected function lookupCapture(Token $token) : string { + private function lookupCapture(Token $token) : string { $id = (string) $token; if ($id === '·_') return ''; diff --git a/src/PatternInterface.php b/src/PatternInterface.php new file mode 100644 index 0000000..fd9f2f5 --- /dev/null +++ b/src/PatternInterface.php @@ -0,0 +1,12 @@ +type = $type; $this->token = $token; + $this->stack = $token; $this->expected = new Expected($token); } @@ -259,6 +260,40 @@ protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ { $ast = new Ast($this->label); + while( + (null !== $ts->current()) && + (($partial = $parser->parse($ts)) instanceof Ast) + ){ + $ast->push($partial); + } + + return $ast->isEmpty() ? ($partial ?? $this->error($ts)) : $ast; + } + + function expected() : Expected + { + return $this->stack[0]->expected(); + } + + function isFallible() : bool + { + return $this->stack[0]->isFallible(); + } + }; +} + +function set(Parser $parser) : Parser +{ + if (! $parser->isFallible()) + throw new InvalidArgumentException( + 'Infinite loop at ' . __FUNCTION__ . '('. $parser . '(*))'); + + return new class(__FUNCTION__, $parser) extends Parser + { + protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ + { + $ast = new Ast($this->label); + while( (null !== $ts->current()) && (($partial = $parser->parse($ts)) instanceof Ast) @@ -283,7 +318,7 @@ function isFallible() : bool function between(Parser $a, Parser $b, Parser $c): Parser { - return new class(__FUNCTION__, $a, commit($b), commit($c)) extends Parser + return new class(__FUNCTION__, $a, $b, $c) extends Parser { protected function parser(TokenStream $ts, Parser $a, Parser $b, Parser $c) /*: Result|null*/ { @@ -399,9 +434,9 @@ function braces(): Parser ( token('{') , - layer() + commit(layer()) , - token('}') + commit(token('}')) ) ; } @@ -413,9 +448,9 @@ function brackets(): Parser ( token('[') , - layer() + commit(layer()) , - token(']') + commit(token(']')) ) ; } @@ -426,9 +461,9 @@ function parentheses(): Parser ( token('(') , - layer() + commit(layer()) , - token(')') + commit(token(')')) ) ; } @@ -503,7 +538,13 @@ protected function parser(TokenStream $ts, Parser ...$routes) /*: Result|null*/ $errors = []; foreach ($routes as $route) { if (($result = $route->parse($ts)) instanceof Ast) { - return $result->as($this->label); + if ($this->label && $route->label) { + $ret = new Ast($this->label); + $ret->append($result); + return $ret; + } + else + return $result->as($this->label); } if ($this->errorLevel === Error::ENABLED) { if ($errors) end($errors)->with($result); @@ -781,7 +822,13 @@ function future(&$parser) : Parser { protected function parser(TokenStream $ts, callable $delayed) /*: Result|null*/ { - $result = $delayed()->parse($ts); + $parser = $delayed(); + + if ($this->errorLevel === Error::ENABLED) + $parser->withErrorLevel($this->errorLevel); + + $result = $parser->parse($ts); + if ($result instanceof Ast) $result->as($this->label); return $result; diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 48cf990..972ddb0 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -452,11 +452,11 @@ function testBetweenWithMiddleFailure(string $src) { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), chain(token(T_STRING), token(T_STRING)), chain(token('}'), token('}')) - ), + )), "Unexpected '~' on line 1, expected T_STRING()." ); } @@ -476,11 +476,11 @@ function testBetweenWithExitFailure(string $src, string $msg) { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), repeat(token(T_STRING)), chain(token('}'), token('}')) - ), + )), $msg ); } @@ -490,11 +490,11 @@ function testBetweenWithMiddleErrorWhenMiddleIsOptional() { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), optional(token(T_STRING)), chain(token('}'), token('}')) - ), + )), "Unexpected '~' on line 1, expected '}'." ); } @@ -504,11 +504,11 @@ function testBetweenWithMiddleHalt() { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), chain(token(T_STRING), token(T_STRING)), chain(token('}'), token('}')) - ), + )), "Unexpected '}' on line 1, expected T_STRING()." ); } @@ -921,7 +921,7 @@ function baz(); ( either ( - repeat + set ( either ( @@ -952,8 +952,6 @@ function baz(); , token(';') ) - , - token('}') ) ) ->as('methods') diff --git a/tests/phpt/group_use_grammar.phpt b/tests/phpt/group_use_grammar.phpt new file mode 100644 index 0000000..39625e6 --- /dev/null +++ b/tests/phpt/group_use_grammar.phpt @@ -0,0 +1,44 @@ +--TEST-- +A proof of concept polyfill for group use --pretty-print +--FILE-- +> { + ·group_use ··· { + ·entries ··· { + ·entry ··· { + use ·type ·base\·name ·alias ···{as ·label}; + } + } + } +} + +use A\B\C\{ + Foo, + Foo\Bar, + Baz as Boo, + const X as Y, + function d\e as f +} + +?> +--EXPECTF-- + diff --git a/tests/phpt/json.phpt b/tests/phpt/json.phpt new file mode 100644 index 0000000..40b9549 --- /dev/null +++ b/tests/phpt/json.phpt @@ -0,0 +1,55 @@ +--TEST-- +Proof of concept native json support with PEG macro --pretty-print +--FILE-- +> { + JSON_MATCH +} + +json : { + 'a' : true, + 'b' : false, + 'c' : null, + 'd' : 'string', + 'e' : { + 'a' : true, + 'b' : false, + 'c' : null, + 'd' : 'string', + 'e' : { + 'f': {} + }, + 'f' : ['', {'g': {'h': {}}}, null, true, false, [], [1]] + } +}; + + +?> +--EXPECTF-- + From 1f4d4eed6f9e6e95d4d5b01d36e7e89ff5ea33ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Mon, 27 Feb 2017 01:57:45 -0300 Subject: [PATCH 16/16] adapt tests as order from unfolded ast is deterministic now --- tests/phpt/property_accessors.phpt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/phpt/property_accessors.phpt b/tests/phpt/property_accessors.phpt index 140d5f7..d5d3234 100644 --- a/tests/phpt/property_accessors.phpt +++ b/tests/phpt/property_accessors.phpt @@ -81,14 +81,14 @@ namespace App; class Sprocket { private $type { - get :string { - return $this->type; - } - set(string $value) { $this->type = $value; } + get :string { + return $this->type; + } + unset { $this->type = ''; }