-
Notifications
You must be signed in to change notification settings - Fork 513
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts of CPython Tools/peg_generator/pegen to create Javascript…
… PEG parser. Related to issue #2354
- Loading branch information
1 parent
3ce0dd2
commit 97b7e31
Showing
22 changed files
with
5,613 additions
and
9 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
#!/usr/bin/env python3.8 | ||
|
||
"""pegen -- PEG Generator. | ||
Search the web for PEG Parsers for reference. | ||
""" | ||
|
||
import argparse | ||
import sys | ||
import time | ||
import token | ||
import traceback | ||
from typing import Tuple | ||
|
||
from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer | ||
from pegen.validator import validate_grammar | ||
|
||
|
||
def generate_c_code( | ||
args: argparse.Namespace, | ||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: | ||
from pegen.build import build_c_parser_and_generator | ||
|
||
verbose = args.verbose | ||
verbose_tokenizer = verbose >= 3 | ||
verbose_parser = verbose == 2 or verbose >= 4 | ||
try: | ||
grammar, parser, tokenizer, gen = build_c_parser_and_generator( | ||
args.grammar_filename, | ||
args.tokens_filename, | ||
args.output, | ||
args.compile_extension, | ||
verbose_tokenizer, | ||
verbose_parser, | ||
args.verbose, | ||
keep_asserts_in_extension=False if args.optimized else True, | ||
skip_actions=args.skip_actions, | ||
) | ||
return grammar, parser, tokenizer, gen | ||
except Exception as err: | ||
if args.verbose: | ||
raise # Show traceback | ||
traceback.print_exception(err.__class__, err, None) | ||
sys.stderr.write("For full traceback, use -v\n") | ||
sys.exit(1) | ||
|
||
|
||
def generate_python_code( | ||
args: argparse.Namespace, | ||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: | ||
from pegen.build import build_python_parser_and_generator | ||
|
||
verbose = args.verbose | ||
verbose_tokenizer = verbose >= 3 | ||
verbose_parser = verbose == 2 or verbose >= 4 | ||
try: | ||
grammar, parser, tokenizer, gen = build_python_parser_and_generator( | ||
args.grammar_filename, | ||
args.output, | ||
verbose_tokenizer, | ||
verbose_parser, | ||
skip_actions=args.skip_actions, | ||
) | ||
return grammar, parser, tokenizer, gen | ||
except Exception as err: | ||
if args.verbose: | ||
raise # Show traceback | ||
traceback.print_exception(err.__class__, err, None) | ||
sys.stderr.write("For full traceback, use -v\n") | ||
sys.exit(1) | ||
|
||
def generate_javascript_code( | ||
args: argparse.Namespace, | ||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: | ||
from pegen.build import build_javascript_parser_and_generator | ||
|
||
verbose = args.verbose | ||
verbose_tokenizer = verbose >= 3 | ||
verbose_parser = verbose == 2 or verbose >= 4 | ||
try: | ||
grammar, parser, tokenizer, gen = build_javascript_parser_and_generator( | ||
args.grammar_filename, | ||
args.output, | ||
verbose_tokenizer, | ||
verbose_parser, | ||
skip_actions=args.skip_actions, | ||
) | ||
return grammar, parser, tokenizer, gen | ||
except Exception as err: | ||
if args.verbose: | ||
raise # Show traceback | ||
traceback.print_exception(err.__class__, err, None) | ||
sys.stderr.write("For full traceback, use -v\n") | ||
sys.exit(1) | ||
|
||
def generate_javascript_code( | ||
args: argparse.Namespace, | ||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: | ||
from pegen.build import build_javascript_parser_and_generator | ||
verbose = args.verbose | ||
verbose_tokenizer = verbose >= 3 | ||
verbose_parser = verbose == 2 or verbose >= 4 | ||
try: | ||
grammar, parser, tokenizer, gen = build_javascript_parser_and_generator( | ||
args.grammar_filename, | ||
args.tokens_filename, | ||
args.output, | ||
args.compile_extension, | ||
verbose_tokenizer, | ||
verbose_parser, | ||
args.verbose, | ||
keep_asserts_in_extension=False if args.optimized else True, | ||
skip_actions=args.skip_actions, | ||
) | ||
return grammar, parser, tokenizer, gen | ||
except Exception as err: | ||
if args.verbose: | ||
raise # Show traceback | ||
traceback.print_exception(err.__class__, err, None) | ||
sys.stderr.write("For full traceback, use -v\n") | ||
sys.exit(1) | ||
|
||
argparser = argparse.ArgumentParser( | ||
prog="pegen", description="Experimental PEG-like parser generator" | ||
) | ||
argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar") | ||
argparser.add_argument( | ||
"-v", | ||
"--verbose", | ||
action="count", | ||
default=0, | ||
help="Print timing stats; repeat for more debug output", | ||
) | ||
subparsers = argparser.add_subparsers(help="target language for the generated code") | ||
|
||
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython") | ||
c_parser.set_defaults(func=generate_c_code) | ||
c_parser.add_argument("grammar_filename", help="Grammar description") | ||
c_parser.add_argument("tokens_filename", help="Tokens description") | ||
c_parser.add_argument( | ||
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser" | ||
) | ||
c_parser.add_argument( | ||
"--compile-extension", | ||
action="store_true", | ||
help="Compile generated C code into an extension module", | ||
) | ||
c_parser.add_argument( | ||
"--optimized", action="store_true", help="Compile the extension in optimized mode" | ||
) | ||
c_parser.add_argument( | ||
"--skip-actions", | ||
action="store_true", | ||
help="Suppress code emission for rule actions", | ||
) | ||
|
||
python_parser = subparsers.add_parser("python", help="Generate Python code") | ||
python_parser.set_defaults(func=generate_python_code) | ||
python_parser.add_argument("grammar_filename", help="Grammar description") | ||
python_parser.add_argument( | ||
"-o", | ||
"--output", | ||
metavar="OUT", | ||
default="parse.py", | ||
help="Where to write the generated parser", | ||
) | ||
python_parser.add_argument( | ||
"--skip-actions", | ||
action="store_true", | ||
help="Suppress code emission for rule actions", | ||
) | ||
|
||
javascript_parser = subparsers.add_parser("javascript", help="Generate Javascript code for inclusion into CPython") | ||
javascript_parser.set_defaults(func=generate_javascript_code) | ||
javascript_parser.add_argument("grammar_filename", help="Grammar description") | ||
javascript_parser.add_argument("tokens_filename", help="Tokens description") | ||
javascript_parser.add_argument( | ||
"-o", "--output", metavar="OUT", default="parse.js", help="Where to write the generated parser" | ||
) | ||
javascript_parser.add_argument( | ||
"--compile-extension", | ||
action="store_true", | ||
help="Compile generated C code into an extension module", | ||
) | ||
javascript_parser.add_argument( | ||
"--optimized", action="store_true", help="Compile the extension in optimized mode" | ||
) | ||
javascript_parser.add_argument( | ||
"--skip-actions", | ||
action="store_true", | ||
help="Suppress code emission for rule actions", | ||
) | ||
|
||
|
||
def main() -> None: | ||
from pegen.testutil import print_memstats | ||
|
||
args = argparser.parse_args() | ||
if "func" not in args: | ||
argparser.error("Must specify the target language mode ('c' or 'python')") | ||
|
||
t0 = time.time() | ||
grammar, parser, tokenizer, gen = args.func(args) | ||
t1 = time.time() | ||
|
||
validate_grammar(grammar) | ||
|
||
if not args.quiet: | ||
if args.verbose: | ||
print("Raw Grammar:") | ||
for line in repr(grammar).splitlines(): | ||
print(" ", line) | ||
|
||
print("Clean Grammar:") | ||
for line in str(grammar).splitlines(): | ||
print(" ", line) | ||
|
||
if args.verbose: | ||
print("First Graph:") | ||
for src, dsts in gen.first_graph.items(): | ||
print(f" {src} -> {', '.join(dsts)}") | ||
print("First SCCS:") | ||
for scc in gen.first_sccs: | ||
print(" ", scc, end="") | ||
if len(scc) > 1: | ||
print( | ||
" # Indirectly left-recursive; leaders:", | ||
{name for name in scc if grammar.rules[name].leader}, | ||
) | ||
else: | ||
name = next(iter(scc)) | ||
if name in gen.first_graph[name]: | ||
print(" # Left-recursive") | ||
else: | ||
print() | ||
|
||
if args.verbose: | ||
dt = t1 - t0 | ||
diag = tokenizer.diagnose() | ||
nlines = diag.end[0] | ||
if diag.type == token.ENDMARKER: | ||
nlines -= 1 | ||
print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") | ||
if dt: | ||
print(f"; {nlines / dt:.0f} lines/sec") | ||
else: | ||
print() | ||
print("Caches sizes:") | ||
print(f" token array : {len(tokenizer._tokens):10}") | ||
print(f" cache : {len(parser._cache):10}") | ||
if not print_memstats(): | ||
print("(Can't find psutil; install it for memory stats.)") | ||
|
||
|
||
if __name__ == "__main__": | ||
if sys.version_info < (3, 8): | ||
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) | ||
sys.exit(1) | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" | ||
Copy-parse of ast.dump, removing the `isinstance` checks. This is needed, | ||
because testing pegen requires generating a C extension module, which contains | ||
a copy of the symbols defined in Python-ast.c. Thus, the isinstance check would | ||
always fail. We rely on string comparison of the base classes instead. | ||
TODO: Remove the above-described hack. | ||
""" | ||
|
||
from typing import Any, Optional, Tuple | ||
|
||
|
||
def ast_dump( | ||
node: Any, | ||
annotate_fields: bool = True, | ||
include_attributes: bool = False, | ||
*, | ||
indent: Optional[str] = None, | ||
) -> str: | ||
def _format(node: Any, level: int = 0) -> Tuple[str, bool]: | ||
if indent is not None: | ||
level += 1 | ||
prefix = "\n" + indent * level | ||
sep = ",\n" + indent * level | ||
else: | ||
prefix = "" | ||
sep = ", " | ||
if any(cls.__name__ == "AST" for cls in node.__class__.__mro__): | ||
cls = type(node) | ||
args = [] | ||
allsimple = True | ||
keywords = annotate_fields | ||
for name in node._fields: | ||
try: | ||
value = getattr(node, name) | ||
except AttributeError: | ||
keywords = True | ||
continue | ||
if value is None and getattr(cls, name, ...) is None: | ||
keywords = True | ||
continue | ||
value, simple = _format(value, level) | ||
allsimple = allsimple and simple | ||
if keywords: | ||
args.append("%s=%s" % (name, value)) | ||
else: | ||
args.append(value) | ||
if include_attributes and node._attributes: | ||
for name in node._attributes: | ||
try: | ||
value = getattr(node, name) | ||
except AttributeError: | ||
continue | ||
if value is None and getattr(cls, name, ...) is None: | ||
continue | ||
value, simple = _format(value, level) | ||
allsimple = allsimple and simple | ||
args.append("%s=%s" % (name, value)) | ||
if allsimple and len(args) <= 3: | ||
return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args | ||
return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False | ||
elif isinstance(node, list): | ||
if not node: | ||
return "[]", True | ||
return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False | ||
return repr(node), True | ||
|
||
if all(cls.__name__ != "AST" for cls in node.__class__.__mro__): | ||
raise TypeError("expected AST, got %r" % node.__class__.__name__) | ||
if indent is not None and not isinstance(indent, str): | ||
indent = " " * indent | ||
return _format(node)[0] |
Oops, something went wrong.