Skip to content

Commit

Permalink
Improve PEG scripts, parses big Python scripts such as _pydecimal.py.…
Browse files Browse the repository at this point in the history
… Related to issue #2354
  • Loading branch information
PierreQuentel committed Jan 7, 2024
1 parent 52aa87c commit d3f7de9
Show file tree
Hide file tree
Showing 8 changed files with 1,222 additions and 20 deletions.
72 changes: 72 additions & 0 deletions scripts/Tokens
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
ENDMARKER
NAME
NUMBER
STRING
NEWLINE
INDENT
DEDENT

LPAR '('
RPAR ')'
LSQB '['
RSQB ']'
COLON ':'
COMMA ','
SEMI ';'
PLUS '+'
MINUS '-'
STAR '*'
SLASH '/'
VBAR '|'
AMPER '&'
LESS '<'
GREATER '>'
EQUAL '='
DOT '.'
PERCENT '%'
LBRACE '{'
RBRACE '}'
EQEQUAL '=='
NOTEQUAL '!='
LESSEQUAL '<='
GREATEREQUAL '>='
TILDE '~'
CIRCUMFLEX '^'
LEFTSHIFT '<<'
RIGHTSHIFT '>>'
DOUBLESTAR '**'
PLUSEQUAL '+='
MINEQUAL '-='
STAREQUAL '*='
SLASHEQUAL '/='
PERCENTEQUAL '%='
AMPEREQUAL '&='
VBAREQUAL '|='
CIRCUMFLEXEQUAL '^='
LEFTSHIFTEQUAL '<<='
RIGHTSHIFTEQUAL '>>='
DOUBLESTAREQUAL '**='
DOUBLESLASH '//'
DOUBLESLASHEQUAL '//='
AT '@'
ATEQUAL '@='
RARROW '->'
ELLIPSIS '...'
COLONEQUAL ':='
EXCLAMATION '!'

OP
AWAIT
ASYNC
TYPE_IGNORE
TYPE_COMMENT
SOFT_KEYWORD
FSTRING_START
FSTRING_MIDDLE
FSTRING_END
COMMENT
NL
ERRORTOKEN

# These aren't used by the C tokenizer but are needed for tokenize.py
ENCODING
37 changes: 35 additions & 2 deletions scripts/adapt_grammar_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ def transform_action(action):
#action9 = re.sub(operators_re, r'$B.ast.\1', action8)
action9 = re.sub(r'([a-z]+)_ty\b', r'$B.ast.\1', action8)

for op in ['USub', 'Add', 'Sub', 'Module']:
action9 = re.sub(rf'\b{op}\b', 'new $B.ast.' + op, action9)
for name in operators + ['Module']:
action9 = re.sub(rf'\b{name}\b', 'new $B.ast.' + name, action9)

for name in helper_functions:
action9 = re.sub(rf'\b{name}\b', '$B.helper_functions.' + name, action9)

for name in parser_constants:
action9 = re.sub(rf'\b{name}\b', '$B.parser_constants.' + name, action9)

# remove parameter types, eg
# "$B._PyPegen.joined_str(p, a, (asdl_expr_seq)b, c)"
# replaced by
Expand Down Expand Up @@ -75,6 +81,33 @@ def transform_action(action):

operators_re = r'\b(' + '|'.join(operators) + r')\b'

parser_constants = [
'Store', 'Load', 'Del', 'NULL', 'alias_ty', 'keyword_ty', 'arguments_ty',
'expr_ty', 'asdl_stmt_seq', 'asdl_int_seq', 'asdl_expr_seq',
'asdl_keyword_seq', 'asdl_identifier_seq', 'asdl_pattern_seq',
'asdl_type_param_seq',
'AugOperator', 'Py_Ellipsis', 'Py_False', 'Py_True', 'Py_None',
'PyExc_SyntaxError',
'STAR_TARGETS', 'DEL_TARGETS', 'FOR_TARGETS'
]

helper_functions = [
"CHECK",
"CHECK_VERSION",
"CHECK_NULL_ALLOWED",
"INVALID_VERSION_CHECK",
"NEW_TYPE_COMMENT",
"RAISE_ERROR_KNOWN_LOCATION",
"RAISE_SYNTAX_ERROR",
"RAISE_INDENTATION_ERROR",
"RAISE_SYNTAX_ERROR_KNOWN_LOCATION",
"RAISE_SYNTAX_ERROR_KNOWN_RANGE",
"RAISE_SYNTAX_ERROR_INVALID_TARGET",
"_RAISE_SYNTAX_ERROR_INVALID_TARGET",
"RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN",
"asdl_seq_LEN",
"asdl_seq_GET"]

if __name__ == '__main__':
src = """_PyAST_alias(a->v.Name.id,
(b) ? ((expr_ty) b)->v.Name.id : NULL,
Expand Down
4 changes: 2 additions & 2 deletions scripts/make_javascript_gen_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
grammar_file = f'python{vnum}.gram'

dest = os.path.join(os.path.dirname(os.getcwd()),
'www', 'src', 'gen_parseXXX.js')
'www', 'src', 'gen_parse.js')

os.system(f'python -m pegen javascript d:/cpython/Grammar/python.gram d:/cpython/Grammar/Tokens ' +
os.system(f'python -m pegen javascript {grammar_file} Tokens ' +
f'-o {dest}')
2 changes: 1 addition & 1 deletion scripts/pegen/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def build_javascript_generator(
gen: ParserGenerator = JavascriptParserGenerator(
grammar, all_tokens, exact_tok, non_exact_tok, file,
skip_actions=skip_actions,
debug = True
debug = False
)
gen.generate(grammar_file)

Expand Down
17 changes: 6 additions & 11 deletions scripts/pegen/javascript_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,12 @@

EXTENSION_SUFFIX = """
$B._PyPegen_parse = function(p){
console.log('parse', p)
// Initialize keywords
p.keywords = reserved_keywords;
p.n_keyword_lists = n_keyword_lists;
p.soft_keywords = soft_keywords;
console.log('first token', p.tok.next().value)
// skip first token (ENCODING)
p.tok.next()
return file_rule(p)
Expand Down Expand Up @@ -611,21 +610,17 @@ def _set_up_token_start_metadata_extraction(self) -> None:
self.print("p.error_indicator = 1;")
self.add_return("NULL")
self.print("}")
self.print("var _start_lineno = p.tokens[_mark].lineno;")
self.print("UNUSED(_start_lineno); // Only used by EXTRA macro")
self.print("var _start_col_offset = p.tokens[_mark].col_offset;")
self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro")
self.print("EXTRA.lineno = p.tokens[_mark].lineno;")
self.print("EXTRA.col_offset = p.tokens[_mark].col_offset;")

def _set_up_token_end_metadata_extraction(self) -> None:
self.print("var _token = $B._PyPegen.get_last_nonnwhitespace_token(p);")
self.print("if (_token == NULL) {")
with self.indent():
self.add_return("NULL")
self.print("}")
self.print("var _end_lineno = _token.end_lineno;")
self.print("UNUSED(_end_lineno); // Only used by EXTRA macro")
self.print("var _end_col_offset = _token.end_col_offset;")
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
self.print("EXTRA.end_lineno = _token.end_lineno;")
self.print("EXTRA.end_col_offset = _token.end_col_offset;")

def _check_for_errors(self) -> None:
self.print("if (p.error_indicator) {")
Expand Down
7 changes: 3 additions & 4 deletions www/src/action_helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -185,19 +185,19 @@ $B._PyPegen.constant_from_string = function(p, token){
}
}
var ast_obj = new $B.ast.Constant(value)
set_position_from_obj(ast_obj, p.arena)
set_position_from_token(ast_obj, token)
return ast_obj
}

$B._PyPegen.constant_from_token = function(p, t){
var ast_obj = new $B.ast.Constant(t.string)
set_position_from_obj(ast_obj, p.arena)
set_position_from_token(ast_obj, t)
return ast_obj
}

$B._PyPegen.decoded_constant_from_token = function(p, t){
var ast_obj = new $B.ast.Constant(t.string)
set_position_from_obj(ast_obj, p.arena)
set_position_from_token(ast_obj, t)
return ast_obj
}

Expand Down Expand Up @@ -710,7 +710,6 @@ $B._PyPegen.collect_call_seqs = function(p, a, b,
if (b == NULL) {
return $B._PyAST.Call($B._PyPegen.dummy_name(p), a, [], lineno, col_offset,
end_lineno, end_col_offset, arena);

}

var starreds = $B._PyPegen.seq_extract_starred_exprs(p, b),
Expand Down
Loading

0 comments on commit d3f7de9

Please sign in to comment.