Skip to content

Commit

Permalink
Improve generated PEG parser. Related to issue #2354.
Browse files Browse the repository at this point in the history
  • Loading branch information
PierreQuentel committed Jan 20, 2024
1 parent 6185569 commit 643b343
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 28 deletions.
21 changes: 12 additions & 9 deletions scripts/pegen/javascript_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,12 @@
// skip first token (ENCODING)
p.tok.next()
return file_rule(p)
switch(p.mode){
case 'file':
return file_rule(p)
case 'eval':
return eval_rule(p)
}
}
"""
Expand Down Expand Up @@ -577,20 +582,18 @@ def _setup_keywords(self) -> None:
)
self.print(f"const n_keyword_lists = {n_keyword_lists};")
groups = self._group_keywords_by_length()
self.print("const reserved_keywords = {")
self.print("const _reserved_keywords = {")
with self.indent():
num_groups = max(groups) + 1 if groups else 1
for keywords_length in range(num_groups):
if keywords_length not in groups.keys():
self.print("NULL: -1,")
else:
# self.print("(KeywordToken[]) {")
# with self.indent():
if keywords_length in groups.keys():
for keyword_str, keyword_type in groups[keywords_length]:
self.print(f'{keyword_str}: {keyword_type},')
# self.print("{NULL, -1},")
# self.print("},")
self.print("};")
self.print("const reserved_keywords = Object.create(null)")
self.print("for(var item of Object.entries(_reserved_keywords)){")
self.print(" reserved_keywords[item[0]] = item[1]")
self.print("}")

def _setup_soft_keywords(self) -> None:
soft_keywords = sorted(self.soft_keywords)
Expand Down
2 changes: 1 addition & 1 deletion www/src/action_helpers_generated_version.js
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ $B._PyPegen.formatted_value = function(p,
var formatted_value = new $B.ast.FormattedValue(expression,
conversion_val,
format === undefined ? format : format.result)
set_position_from_obj(formatted_value, p.arena)
set_position_from_obj(formatted_value, arena)
if(debug){
var debug_end_line,
debug_end_offset,
Expand Down
6 changes: 5 additions & 1 deletion www/src/ast_to_js.js
Original file line number Diff line number Diff line change
Expand Up @@ -1540,7 +1540,11 @@ $B.ast.Constant.prototype.to_js = function(){
}else if(this.value.__class__ === _b_.bytes){
return `_b_.bytes.$factory([${this.value.source}])`
}else if(typeof this.value == "number"){
return this.value
if(Number.isInteger(this.value)){
return this.value
}else{
return `({__class__: _b_.float, value: ${this.value}})`
}
}else if(this.value.__class__ === $B.long_int){
return `$B.fast_long_int(${this.value.value}n)`
}else if(this.value.__class__ === _b_.float){
Expand Down
15 changes: 11 additions & 4 deletions www/src/gen_parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,7 @@ const Store = new $B.ast.Store(),

const EXTRA = {}
const n_keyword_lists = 9;
const reserved_keywords = {
NULL: -1,
NULL: -1,
const _reserved_keywords = {
if: 642,
as: 640,
in: 651,
Expand Down Expand Up @@ -138,6 +136,10 @@ const reserved_keywords = {
continue: 509,
nonlocal: 524,
};
const reserved_keywords = Object.create(null)
for(var item of Object.entries(_reserved_keywords)){
reserved_keywords[item[0]] = item[1]
}
const soft_keywords = [
"_",
"case",
Expand Down Expand Up @@ -27358,6 +27360,11 @@ $B._PyPegen_parse = function(p){
// skip first token (ENCODING)
p.tok.next()

return file_rule(p)
switch(p.mode){
case 'file':
return file_rule(p)
case 'eval':
return eval_rule(p)
}

}
33 changes: 28 additions & 5 deletions www/src/pegen.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ function PyUnicode_IS_ASCII(char){
}

function PyBytes_FromStringAndSize(s){
return $B.builtins.str.encode(s, 'iso-8859-1')
var dest = new Uint8Array(s.length * 3)
var encoder = new TextEncoder()
var result = encoder.encodeInto(s, dest)
return $B.fast_bytes(Array.from(dest.slice(0, result.written)))
}

function _PyArena_AddPyObject(arena, obj){
Expand Down Expand Up @@ -245,6 +248,11 @@ function initialize_token(p, parser_token, new_token, token_type) {
// assert(parser_token != NULL);

parser_token.num_type = (token_type == NAME) ? _get_keyword_or_name_type(p, new_token) : token_type;
if(parser_token.num_type == -1){
console.log('bizarre', new_token)
console.log('keywords', p.keywords)
alert()
}
parser_token.bytes = PyBytes_FromStringAndSize(new_token.string)

_PyArena_AddPyObject(p.arena, parser_token.bytes)
Expand Down Expand Up @@ -590,7 +598,21 @@ $B._PyPegen.soft_keyword_token = function(p) {
return NULL;
}

function prepared_number_value(prepared){
switch(prepared.type){
case 'float':
return parseFloat(prepared.value)
case 'imaginary':
return $B.make_complex(0, prepared_number_value(prepared.value))
case 'int':
return parseInt(prepared.value[1], prepared.value[0])
}
}

function parsenumber_raw(s){
var prepared = $B.prepare_number(s) // in number_parser.js
return prepared_number_value(prepared)
/*
var nd,
x,
dx,
Expand All @@ -599,8 +621,9 @@ function parsenumber_raw(s){
// assert(s != NULL);
errno = 0;
end = s + strlen(s) - 1;
imflag = end == 'j' || end == 'J';
end = strlen(s) - 1;
console.log('end', end, 'last', s[end])
imflag = s[end] == 'j' || s[end] == 'J';
if (s[0] == '0') {
x = PyOS_strtoul(s, end, 0);
if (x < 0 && errno == 0) {
Expand All @@ -615,7 +638,6 @@ function parsenumber_raw(s){
}
return PyLong_FromLong(x);
}
/* XXX Huge floats may silently fail */
if (imflag) {
compl.real = 0.;
compl.imag = PyOS_string_to_double(s, end, NULL);
Expand All @@ -629,6 +651,7 @@ function parsenumber_raw(s){
return NULL;
}
return PyFloat_FromDouble(dx);
*/
}

function parsenumber(s){
Expand Down Expand Up @@ -666,7 +689,7 @@ $B._PyPegen.number_token = function(p){
}

var c = parsenumber(num_raw);

if (c == NULL) {
p.error_indicator = 1;
var tstate = _PyThreadState_GET();
Expand Down
16 changes: 16 additions & 0 deletions www/src/py2js.js
Original file line number Diff line number Diff line change
Expand Up @@ -8886,12 +8886,28 @@ $B.py2js = function(src, module, locals_id, parent_scope){
if($B.parser_to_ast){
console.log('use standard parser')
_ast = new $B.Parser(src, filename, 'file').parse()
}else if($B.py_tokens){
// generated PEG parser
console.log('use generated PEG parser')
var parser = new $B.Parser(src, filename, 'file')
_ast = $B._PyPegen_parse(parser)
console.log('tokens', parser.tokens)
if(_ast === undefined){
console.log('_ast undef', src)
console.log('tokens\n', parser.tokens)
alert()
parser = new $B.Parser(src, filename, 'file')
parser.call_invalid_rules = true
$B._PyPegen_parse(parser)
console.log('parsed invalid rules')
}
}else{
var root = create_root_node({src, filename},
module, locals_id, parent_scope)
dispatch_tokens(root)
_ast = root.ast()
}
// console.log('_ast', _ast)
$B.parse_time += globalThis.performance.now() - t0
var future = $B.future_features(_ast, filename)
var symtable = $B._PySymtable_Build(_ast, filename, future)
Expand Down
25 changes: 20 additions & 5 deletions www/src/py_builtin_functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -735,13 +735,25 @@ var $$eval = _b_.eval = function(){
}

try{
if($B.parser_to_ast){
if(! _ast){
if(! _ast){
if($B.parser_to_ast){
var _mode = mode == 'eval' ? 'eval' : 'file'
_ast = new $B.Parser(src, filename, _mode).parse()
}
}else{
if(! _ast){
}else if($B.py_tokens){
// generated PEG parser
var _mode = mode == 'eval' ? 'eval' : 'file'
var parser = new $B.Parser(src, filename, _mode)
_ast = $B._PyPegen_parse(parser)
if(_ast === undefined){
console.log('_ast undef', src)
console.log('tokens\n', parser.tokens)
alert()
parser = new $B.Parser(src, filename, 'file')
parser.call_invalid_rules = true
$B._PyPegen_parse(parser)
console.log('parsed invalid rules')
}
}else{
var root = $B.parser.create_root_node(src, '<module>', frame[0], frame[2],
1)
root.mode = mode
Expand Down Expand Up @@ -785,6 +797,9 @@ var $$eval = _b_.eval = function(){
`_b_.print(result)\n` +
`}`
}

console.log('eval js\n', $B.format_indent(js, 0))

try{
var exec_func = new Function('$B', '_b_',
local_name, global_name,
Expand Down
2 changes: 2 additions & 0 deletions www/src/py_bytes.js
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,8 @@ function fast_bytes(t){
}
}

$B.fast_bytes = fast_bytes

bytes.$factory = function(){
return bytes.__new__.bind(null, bytes).apply(null, arguments)
}
Expand Down
3 changes: 2 additions & 1 deletion www/src/python_parser_peg_version.js
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ var Parser = $B.Parser = function(src, filename, mode){
}

Parser.prototype.parse = function(){
console.log('parse')
if(this.src.trim().length == 0){
// eg empty __init__.py
return new $B.ast.Module([])
Expand Down Expand Up @@ -415,7 +416,7 @@ Parser.prototype.get_memo = function(rule, position){
var ignored = [$B.py_tokens.ENCODING,
$B.py_tokens.NL,
$B.py_tokens.COMMENT]

Parser.prototype.read_token = function(){
while(true){
var next = this.tokenizer.next()
Expand Down
6 changes: 6 additions & 0 deletions www/src/python_tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,17 @@ function Token(type, string, start, end, line){
res.num_type = $B.py_tokens[type]
if(type == 'OP'){
res.num_type = $B.py_tokens[$B.EXACT_TOKEN_TYPES[string]]
}else if(type == 'NAME' && ['async', 'await'].includes(string)){
res.num_type = $B.py_tokens[string.toUpperCase()]
}
res.lineno = start[0]
res.col_offset = start[1]
res.end_lineno = end[0]
res.end_col_offset = end[1]
if(res.num_type == -1){
console.log('res', res)
alert()
}
}else{
res = {type, string, start, end, line}
res[0] = type
Expand Down
12 changes: 10 additions & 2 deletions www/tests/parse_tests/test_generated_parser.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
<!-- scripts for PEG parser -->
<script type="text/javascript" src="/src/action_helpers_generated_version.js"></script>
<script type="text/javascript" src="/src/string_parser.js"></script>
<script type="text/javascript" src="/src/number_parser.js"></script>
<script type="text/javascript" src="/src/python_parser_peg_version.js"></script>
<script type="text/javascript" src="/src/pegen.js"></script>
<script type="text/javascript" src="/src/gen_parse.js"></script>
Expand All @@ -81,9 +82,16 @@ <h2>Test generated PEG parser</h2>
parser = new $B.Parser(src, filename, 'file')
parser.call_invalid_rules = true
$B._PyPegen_parse(parser)
}else{
var imported
var future = $B.future_features(_ast, filename)
var symtable = $B._PySymtable_Build(_ast, filename, future)
var js_obj = $B.js_from_root({ast: _ast,
symtable,
filename,
imported})
console.log('conv to js ok, length', js_obj)
}
console.log(filename, 'parsed in', window.performance.now() - t0, 'ms')
console.log('nb tokens', parser.tokens.length)
}

</script>
Expand Down

0 comments on commit 643b343

Please sign in to comment.