diff --git a/src/Ast.php b/src/Ast.php index 2795a04..de40bab 100644 --- a/src/Ast.php +++ b/src/Ast.php @@ -130,12 +130,16 @@ function isEmpty() : bool { return !\count($this->ast) || null === $this->ast; } - function as(/*string|null*/ $label = null) : self { - if (null !== $label && null === $this->label) $this->label = $label; + function as(string $label = null) : self { + if (null !== $label) $this->label = $label; return $this; } + function label() { + return $this->label; + } + function withParent(self $parent) : self { $this->parent = $parent; diff --git a/src/Engine.php b/src/Engine.php index 96773fa..ecb0217 100644 --- a/src/Engine.php +++ b/src/Engine.php @@ -105,7 +105,14 @@ function __construct() { ->onCommit(function(Ast $macroAst) { $scope = Map::fromEmpty(); $tags = Map::fromValues(array_map('strval', $macroAst->{'tags'})); - $pattern = new Pattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + + if ($tags->contains('·grammar')) { + $pattern = new GrammarPattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + } + else { + $pattern = new Pattern($macroAst->{'declaration'}->line(), $macroAst->{'body pattern'}, $tags, $scope); + } + $expansion = new Expansion($macroAst->{'body expansion'}, $tags, $scope); $macro = new Macro($tags, $pattern, $expansion); diff --git a/src/Expansion.php b/src/Expansion.php index 6aaebcb..cf0c5d4 100644 --- a/src/Expansion.php +++ b/src/Expansion.php @@ -6,14 +6,6 @@ class Expansion extends MacroMember { - const - PRETTY_PRINT = - JSON_PRETTY_PRINT - | JSON_BIGINT_AS_STRING - | JSON_UNESCAPED_UNICODE - | JSON_UNESCAPED_SLASHES - ; - const E_BAD_EXPANSION = "Bad macro expansion identifier '%s' on line %d.", E_BAD_EXPANDER = "Bad macro expander '%s' on line %d.", diff --git a/src/GrammarPattern.php b/src/GrammarPattern.php new file mode 100644 index 0000000..e194d4c --- /dev/null +++ b/src/GrammarPattern.php @@ -0,0 +1,393 @@ +fail(self::E_EMPTY_PATTERN, $line); + + $this->scope = $scope; + $this->unreached = new Map; + $this->staged = new Map; + $this->collected = new Map; + $this->pattern = $this->compile($line, $pattern); + } + + private function compile(int $line, array $tokens) { + + $label = rtoken('/^·\w+$/')->as('label'); + + $doubleQuotes = token(T_CONSTANT_ENCAPSED_STRING, "''"); + + $commit = chain(token('!'), token('!'))->as('commit'); + + $literal = between($doubleQuotes, any(), $doubleQuotes)->as('literal'); + + $constant = rtoken('/^T_\w+$/')->as('constant'); + + $optionalModifier = optional(token('?'), false)->as('optional?'); + + $productionModifier = optional(token(T_SL), false)->as('production?'); + + $parser = ( + chain + ( + rtoken('/^·\w+$/')->as('type') + , + token('(') + , + optional + ( + ls + ( + either + ( + future + ( + $parser // recursion !!! + ) + ->as('parser') + , + chain + ( + token(T_FUNCTION) + , + parentheses()->as('args') + , + braces()->as('body') + ) + ->as('function') + , + string()->as('string') + , + rtoken('/^T_\w+·\w+$/')->as('token') + , + rtoken('/^T_\w+$/')->as('constant') + , + label()->as('label') + ) + , + token(',') + ) + ) + ->as('args') + , + commit + ( + token(')') + ) + , + optional + ( + rtoken('/^·\w+$/')->as('label'), null + ) + ) + ->as('parser') + ); + + $labelReference = + chain + ( + $label + , + optional + ( + chain + ( + token('{') + , + token('}') + , + $label + ) + , + null + ) + ->as('alias') + ) + ->as('reference') + ; + + $list = + chain + ( + $optionalModifier + , + token(T_LIST) + , + token('(') + , + future($sequence)->as('member') + , + token(',') + , + (clone $literal)->as('delimiter') + , + token(')') + ) + ->as('list') + ; + + $sequence = + repeat + ( + either + ( + $list + , + $parser + , + $labelReference + , + $constant + , + $literal + , + $commit + ) + ) + ->as('sequence') + ; + + $disjunction = ls($sequence, token('|'))->as('disjunction'); + + $rule = + commit + ( + chain + ( + $productionModifier + , + $label + , + $optionalModifier + , + either + ( + between + ( + token('{') + , + $sequence + , + token('}') + ) + , + between + ( + token('{') + , + $disjunction + , + token('}') + ) + ) + ) + ) + ->as('rule') + ; + + $grammar = + commit + ( + chain + ( + optional + ( + repeat($rule) + ) + ->as('rules') + ) + ) + ; + + $grammarAst = $grammar->parse(TokenStream::fromSlice($tokens)); + + $productions = new Map; + + foreach ($grammarAst->{'* rules'}->list() as $ast) { + + $ruleAst = $ast->{'* rule'}; + $labelAst = $ast->{'* rule label'}; + $label = (string) $labelAst->token(); + + if ($ruleAst->{'production?'}) { + + $productions->add($label, $ruleAst); + + if ($productions->count() > 1) { + $this->fail( + self::E_GRAMMAR_MULTIPLE_PRODUCTIONS, + $line, + json_encode($productions->symbols(), self::PRETTY_PRINT) + ); + } + + continue; + } + + if ($this->unreached->contains($label)) + $this->fail(self::E_GRAMMAR_DUPLICATED_RULE_LABEL, $label, $labelAst->token()->line()); + + $this->unreached->add($label, $ruleAst); + } + + if ($productions->count() === 0) + $this->fail(self::E_GRAMMAR_MISSING_PRODUCTION, $line); + + $productionLabel = $productions->symbols()[0]; + + $this->scope->add($productionLabel); + + $pattern = $this->compilePattern($productions->get($productionLabel)); + + if ($this->unreached->count() > 0) { + $this->fail( + self::E_GRAMMAR_UNREACHABLE_NONTERMINAL, + $productionLabel, + $line, + json_encode($this->unreached->symbols(), self::PRETTY_PRINT) + ); + } + + if ($this->staged->count() > 0) { + $this->fail( + self::E_GRAMMAR_UNREACHABLE_NONTERMINAL, + $productionLabel, + $line, + json_encode($this->staged->symbols(), self::PRETTY_PRINT) + ); + } + + $this->specificity = $this->collected->count(); + + if (! $pattern->isFallible()) + $this->fail(self::E_GRAMMAR_NON_FALLIBLE, $productionLabel, $line); + + return $pattern; + } + + private function compilePattern(Ast $rule) : Parser { + + $label = (string) $rule->{'label'}; + + if (! ($sequence = $rule->{'* sequence'})->isEmpty()) + $pattern = $this->compileSequence($sequence, $label); + else if(! ($disjunction = $rule->{'* disjunction'})->isEmpty()) + $pattern = $this->compileDisjunction($disjunction, $label); + else + assert(false, 'Unknown pattern definition.'); + + if ($rule->{'optional?'}) $pattern = optional($pattern); + + $pattern->as($label); + + return $pattern; + } + + private function compileSequence(Ast $sequence, string $label) : Parser { + $commit = false; + $this->staged->add($label); + $chain = []; + foreach ($sequence->list() as $step) { + foreach ($step->list() as $ast) { + $type = $ast->label(); + switch ($type) { + case 'literal': // matches double quoted like: '','' or ''use'' + $chain[] = token($ast->token()); + break; + case 'constant': // T_* + $chain[] = token(parent::lookupTokenType($ast->token())); + break; + case 'parser': + $chain[] = parent::compileParser($ast); + break; + case 'reference': + $refLabel = (string) $ast->{'label'}; + $link = $this->collected->get($refLabel); + if ($link === null) { + if ($this->staged->contains($refLabel)) { + $link = future($this->references[$refLabel]); + } + else { + $link = $this->compilePattern($this->unreached->get($refLabel)); + $this->references[$refLabel] = $link; + $this->collected->add($refLabel, $link); + $this->unreached->remove($refLabel); + } + } + + $link = (clone $link)->as((string) $ast->{'alias label'} ?: null); + + $chain[] = $link; + break; + case 'list': + $link = $this->compileSequence($ast->{'* member'}, $label); + $chain[] = optional(ls($link, token($ast->{'* delimiter'}->token()))); + break; + case 'commit': + $commit = true; + break; + default: + assert(false, 'Unknown sequence step.'); + break; + } + + if ($commit && ($length = count($chain)) > 0) $chain[$length-1] = commit(end($chain)); + } + } + + if (count($chain) > 1) { + $pattern = chain(...$chain); + $pattern->as($label); + } + else { + $pattern = $chain[0]; + } + + $this->staged->remove($label); + + return $pattern; + } + + private function compileDisjunction(Ast $disjunctions, string $label) : Parser { + + $this->staged->add($label); + + $chain = []; + foreach ($disjunctions->list() as $disjunction) { + foreach ($disjunction->list() as $sequence) { + $link = $this->compileSequence($sequence, $label); + $this->collected->add($label, $link); + $this->unreached->remove($label); + $chain[] = $link; + } + } + + $pattern = either(...$chain)->as($label); + + $this->staged->remove($label); + + return $pattern; + } +} diff --git a/src/Macro.php b/src/Macro.php index 8c92ebe..5809d76 100644 --- a/src/Macro.php +++ b/src/Macro.php @@ -17,7 +17,7 @@ class Macro implements Directive { protected static $_id = 0; - function __construct(Map $tags, Pattern $pattern, Expansion $expansion) { + function __construct(Map $tags, PatternInterface $pattern, Expansion $expansion) { $this->id = (__CLASS__)::$_id++; $this->tags = $tags; $this->pattern = $pattern; diff --git a/src/MacroMember.php b/src/MacroMember.php index 6ecee51..78e7370 100644 --- a/src/MacroMember.php +++ b/src/MacroMember.php @@ -4,6 +4,14 @@ abstract class MacroMember { + const + PRETTY_PRINT = + JSON_PRETTY_PRINT + | JSON_BIGINT_AS_STRING + | JSON_UNESCAPED_UNICODE + | JSON_UNESCAPED_SLASHES + ; + protected function fail(string $error, ...$args) { throw new YayParseError(sprintf($error, ...$args)); } diff --git a/src/Map.php b/src/Map.php index c0c02fa..dcdf002 100644 --- a/src/Map.php +++ b/src/Map.php @@ -2,7 +2,7 @@ namespace Yay; -class Map implements Context { +class Map implements Context, \Countable { protected $map = []; function get($key) { @@ -13,6 +13,10 @@ function add($key, $value = true) { return $this->map[$key] = $value; } + function remove($key) { + unset($this->map[$key]); + } + function contains($key) : bool { return isset($this->map[$key]); } @@ -21,6 +25,10 @@ function symbols() : array { return array_keys($this->map); } + function count() : int { + return count($this->map); + } + static function fromValues(array $values = []) : self { $m = self::fromEmpty(); foreach($values as $value) $m->add($value); diff --git a/src/Pattern.php b/src/Pattern.php index 4e115a5..9b625fa 100644 --- a/src/Pattern.php +++ b/src/Pattern.php @@ -2,7 +2,7 @@ namespace Yay; -class Pattern extends MacroMember { +class Pattern extends MacroMember implements PatternInterface { const E_BAD_CAPTURE = "Bad macro capture identifier '%s' on line %d.", @@ -95,8 +95,15 @@ private function compile(array $tokens) { rtoken('/^·this$/')->as('this') , label()->as('label') + , + between + ( + token(T_CONSTANT_ENCAPSED_STRING, "''"), + any(), + token(T_CONSTANT_ENCAPSED_STRING, "''") + ) + ->as('literal') ) - ->as('parser') , token(',') ) @@ -207,7 +214,7 @@ private function layer(string $start, string $end, Parser $parser, $cg) : Parser }); } - private function lookupTokenType(Token $token) : int { + protected function lookupTokenType(Token $token) : int { $type = explode('·', (string) $token)[0]; if (! defined($type)) $this->fail(self::E_BAD_TOKEN_TYPE, $type, $token->line()); @@ -215,7 +222,7 @@ private function lookupTokenType(Token $token) : int { return constant($type); } - protected function lookupCapture(Token $token) : string { + private function lookupCapture(Token $token) : string { $id = (string) $token; if ($id === '·_') return ''; diff --git a/src/PatternInterface.php b/src/PatternInterface.php new file mode 100644 index 0000000..fd9f2f5 --- /dev/null +++ b/src/PatternInterface.php @@ -0,0 +1,12 @@ +type = $type; $this->token = $token; + $this->stack = $token; $this->expected = new Expected($token); } @@ -258,6 +259,40 @@ protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ { $ast = new Ast($this->label); + while( + (null !== $ts->current()) && + (($partial = $parser->parse($ts)) instanceof Ast) + ){ + $ast->push($partial); + } + + return $ast->isEmpty() ? ($partial ?? $this->error($ts)) : $ast; + } + + function expected() : Expected + { + return $this->stack[0]->expected(); + } + + function isFallible() : bool + { + return $this->stack[0]->isFallible(); + } + }; +} + +function set(Parser $parser) : Parser +{ + if (! $parser->isFallible()) + throw new InvalidArgumentException( + 'Infinite loop at ' . __FUNCTION__ . '('. $parser . '(*))'); + + return new class(__FUNCTION__, $parser) extends Parser + { + protected function parser(TokenStream $ts, Parser $parser) /*: Result|null*/ + { + $ast = new Ast($this->label); + while( (null !== $ts->current()) && (($partial = $parser->parse($ts)) instanceof Ast) @@ -282,7 +317,7 @@ function isFallible() : bool function between(Parser $a, Parser $b, Parser $c): Parser { - return new class(__FUNCTION__, $a, commit($b), commit($c)) extends Parser + return new class(__FUNCTION__, $a, $b, $c) extends Parser { protected function parser(TokenStream $ts, Parser $a, Parser $b, Parser $c) /*: Result|null*/ { @@ -398,9 +433,9 @@ function braces(): Parser ( token('{') , - layer() + commit(layer()) , - token('}') + commit(token('}')) ) ; } @@ -412,9 +447,9 @@ function brackets(): Parser ( token('[') , - layer() + commit(layer()) , - token(']') + commit(token(']')) ) ; } @@ -425,9 +460,9 @@ function parentheses(): Parser ( token('(') , - layer() + commit(layer()) , - token(')') + commit(token(')')) ) ; } @@ -502,7 +537,13 @@ protected function parser(TokenStream $ts, Parser ...$routes) /*: Result|null*/ $errors = []; foreach ($routes as $route) { if (($result = $route->parse($ts)) instanceof Ast) { - return $result->as($this->label); + if ($this->label && $route->label) { + $ret = new Ast($this->label); + $ret->append($result); + return $ret; + } + else + return $result->as($this->label); } if ($this->errorLevel === Error::ENABLED) { if ($errors) end($errors)->with($result); @@ -780,7 +821,13 @@ function future(&$parser) : Parser { protected function parser(TokenStream $ts, callable $delayed) /*: Result|null*/ { - $result = $delayed()->parse($ts); + $parser = $delayed(); + + if ($this->errorLevel === Error::ENABLED) + $parser->withErrorLevel($this->errorLevel); + + $result = $parser->parse($ts); + if ($result instanceof Ast) $result->as($this->label); return $result; diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 51da5b9..799742b 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -426,11 +426,11 @@ function testBetweenWithMiddleFailure(string $src) { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), chain(token(T_STRING), token(T_STRING)), chain(token('}'), token('}')) - ), + )), "Unexpected '~' on line 1, expected T_STRING()." ); } @@ -450,11 +450,11 @@ function testBetweenWithExitFailure(string $src, string $msg) { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), repeat(token(T_STRING)), chain(token('}'), token('}')) - ), + )), $msg ); } @@ -464,11 +464,11 @@ function testBetweenWithMiddleErrorWhenMiddleIsOptional() { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), optional(token(T_STRING)), chain(token('}'), token('}')) - ), + )), "Unexpected '~' on line 1, expected '}'." ); } @@ -478,11 +478,11 @@ function testBetweenWithMiddleHalt() { $this->parseSuccess($ts, token(T_OPEN_TAG), "T_OPEN_TAG(parseHalt( $ts, - between( + commit(between( chain(token('{'), token('{')), chain(token(T_STRING), token(T_STRING)), chain(token('}'), token('}')) - ), + )), "Unexpected '}' on line 1, expected T_STRING()." ); } @@ -895,7 +895,7 @@ function baz(); ( either ( - repeat + set ( either ( @@ -926,8 +926,6 @@ function baz(); , token(';') ) - , - token('}') ) ) ->as('methods') diff --git a/tests/phpt/group_use_grammar.phpt b/tests/phpt/group_use_grammar.phpt new file mode 100644 index 0000000..39625e6 --- /dev/null +++ b/tests/phpt/group_use_grammar.phpt @@ -0,0 +1,44 @@ +--TEST-- +A proof of concept polyfill for group use --pretty-print +--FILE-- +> { + ·group_use ··· { + ·entries ··· { + ·entry ··· { + use ·type ·base\·name ·alias ···{as ·label}; + } + } + } +} + +use A\B\C\{ + Foo, + Foo\Bar, + Baz as Boo, + const X as Y, + function d\e as f +} + +?> +--EXPECTF-- + diff --git a/tests/phpt/json.phpt b/tests/phpt/json.phpt new file mode 100644 index 0000000..40b9549 --- /dev/null +++ b/tests/phpt/json.phpt @@ -0,0 +1,55 @@ +--TEST-- +Proof of concept native json support with PEG macro --pretty-print +--FILE-- +> { + JSON_MATCH +} + +json : { + 'a' : true, + 'b' : false, + 'c' : null, + 'd' : 'string', + 'e' : { + 'a' : true, + 'b' : false, + 'c' : null, + 'd' : 'string', + 'e' : { + 'f': {} + }, + 'f' : ['', {'g': {'h': {}}}, null, true, false, [], [1]] + } +}; + + +?> +--EXPECTF-- +