diff --git a/scripts/pegen/javascript_generator.py b/scripts/pegen/javascript_generator.py index 6a430c88d..540385e4f 100644 --- a/scripts/pegen/javascript_generator.py +++ b/scripts/pegen/javascript_generator.py @@ -143,7 +143,12 @@ // skip first token (ENCODING) p.tok.next() - return file_rule(p) + switch(p.mode){ + case 'file': + return file_rule(p) + case 'eval': + return eval_rule(p) + } } """ @@ -577,20 +582,18 @@ def _setup_keywords(self) -> None: ) self.print(f"const n_keyword_lists = {n_keyword_lists};") groups = self._group_keywords_by_length() - self.print("const reserved_keywords = {") + self.print("const _reserved_keywords = {") with self.indent(): num_groups = max(groups) + 1 if groups else 1 for keywords_length in range(num_groups): - if keywords_length not in groups.keys(): - self.print("NULL: -1,") - else: - # self.print("(KeywordToken[]) {") - # with self.indent(): + if keywords_length in groups.keys(): for keyword_str, keyword_type in groups[keywords_length]: self.print(f'{keyword_str}: {keyword_type},') - # self.print("{NULL, -1},") - # self.print("},") self.print("};") + self.print("const reserved_keywords = Object.create(null)") + self.print("for(var item of Object.entries(_reserved_keywords)){") + self.print(" reserved_keywords[item[0]] = item[1]") + self.print("}") def _setup_soft_keywords(self) -> None: soft_keywords = sorted(self.soft_keywords) diff --git a/www/src/action_helpers_generated_version.js b/www/src/action_helpers_generated_version.js index a341de4eb..19ae888aa 100644 --- a/www/src/action_helpers_generated_version.js +++ b/www/src/action_helpers_generated_version.js @@ -218,7 +218,7 @@ $B._PyPegen.formatted_value = function(p, var formatted_value = new $B.ast.FormattedValue(expression, conversion_val, format === undefined ? format : format.result) - set_position_from_obj(formatted_value, p.arena) + set_position_from_obj(formatted_value, arena) if(debug){ var debug_end_line, debug_end_offset, diff --git a/www/src/ast_to_js.js b/www/src/ast_to_js.js index 369f25be3..bd3297777 100644 --- a/www/src/ast_to_js.js +++ b/www/src/ast_to_js.js @@ -1540,7 +1540,11 @@ $B.ast.Constant.prototype.to_js = function(){ }else if(this.value.__class__ === _b_.bytes){ return `_b_.bytes.$factory([${this.value.source}])` }else if(typeof this.value == "number"){ - return this.value + if(Number.isInteger(this.value)){ + return this.value + }else{ + return `({__class__: _b_.float, value: ${this.value}})` + } }else if(this.value.__class__ === $B.long_int){ return `$B.fast_long_int(${this.value.value}n)` }else if(this.value.__class__ === _b_.float){ diff --git a/www/src/gen_parse.js b/www/src/gen_parse.js index ed6a34618..969f83019 100644 --- a/www/src/gen_parse.js +++ b/www/src/gen_parse.js @@ -101,9 +101,7 @@ const Store = new $B.ast.Store(), const EXTRA = {} const n_keyword_lists = 9; -const reserved_keywords = { - NULL: -1, - NULL: -1, +const _reserved_keywords = { if: 642, as: 640, in: 651, @@ -138,6 +136,10 @@ const reserved_keywords = { continue: 509, nonlocal: 524, }; +const reserved_keywords = Object.create(null) +for(var item of Object.entries(_reserved_keywords)){ + reserved_keywords[item[0]] = item[1] +} const soft_keywords = [ "_", "case", @@ -27358,6 +27360,11 @@ $B._PyPegen_parse = function(p){ // skip first token (ENCODING) p.tok.next() - return file_rule(p) + switch(p.mode){ + case 'file': + return file_rule(p) + case 'eval': + return eval_rule(p) + } } diff --git a/www/src/pegen.js b/www/src/pegen.js index 06759c725..2ced61c0f 100644 --- a/www/src/pegen.js +++ b/www/src/pegen.js @@ -57,7 +57,10 @@ function PyUnicode_IS_ASCII(char){ } function PyBytes_FromStringAndSize(s){ - return $B.builtins.str.encode(s, 'iso-8859-1') + var dest = new Uint8Array(s.length * 3) + var encoder = new TextEncoder() + var result = encoder.encodeInto(s, dest) + return $B.fast_bytes(Array.from(dest.slice(0, result.written))) } function _PyArena_AddPyObject(arena, obj){ @@ -245,6 +248,11 @@ function initialize_token(p, parser_token, new_token, token_type) { // assert(parser_token != NULL); parser_token.num_type = (token_type == NAME) ? _get_keyword_or_name_type(p, new_token) : token_type; + if(parser_token.num_type == -1){ + console.log('bizarre', new_token) + console.log('keywords', p.keywords) + alert() + } parser_token.bytes = PyBytes_FromStringAndSize(new_token.string) _PyArena_AddPyObject(p.arena, parser_token.bytes) @@ -590,7 +598,21 @@ $B._PyPegen.soft_keyword_token = function(p) { return NULL; } +function prepared_number_value(prepared){ + switch(prepared.type){ + case 'float': + return parseFloat(prepared.value) + case 'imaginary': + return $B.make_complex(0, prepared_number_value(prepared.value)) + case 'int': + return parseInt(prepared.value[1], prepared.value[0]) + } +} + function parsenumber_raw(s){ + var prepared = $B.prepare_number(s) // in number_parser.js + return prepared_number_value(prepared) + /* var nd, x, dx, @@ -599,8 +621,9 @@ function parsenumber_raw(s){ // assert(s != NULL); errno = 0; - end = s + strlen(s) - 1; - imflag = end == 'j' || end == 'J'; + end = strlen(s) - 1; + console.log('end', end, 'last', s[end]) + imflag = s[end] == 'j' || s[end] == 'J'; if (s[0] == '0') { x = PyOS_strtoul(s, end, 0); if (x < 0 && errno == 0) { @@ -615,7 +638,6 @@ function parsenumber_raw(s){ } return PyLong_FromLong(x); } - /* XXX Huge floats may silently fail */ if (imflag) { compl.real = 0.; compl.imag = PyOS_string_to_double(s, end, NULL); @@ -629,6 +651,7 @@ function parsenumber_raw(s){ return NULL; } return PyFloat_FromDouble(dx); + */ } function parsenumber(s){ @@ -666,7 +689,7 @@ $B._PyPegen.number_token = function(p){ } var c = parsenumber(num_raw); - + if (c == NULL) { p.error_indicator = 1; var tstate = _PyThreadState_GET(); diff --git a/www/src/py2js.js b/www/src/py2js.js index 6bed78064..7140d0d12 100644 --- a/www/src/py2js.js +++ b/www/src/py2js.js @@ -8886,12 +8886,28 @@ $B.py2js = function(src, module, locals_id, parent_scope){ if($B.parser_to_ast){ console.log('use standard parser') _ast = new $B.Parser(src, filename, 'file').parse() + }else if($B.py_tokens){ + // generated PEG parser + console.log('use generated PEG parser') + var parser = new $B.Parser(src, filename, 'file') + _ast = $B._PyPegen_parse(parser) + console.log('tokens', parser.tokens) + if(_ast === undefined){ + console.log('_ast undef', src) + console.log('tokens\n', parser.tokens) + alert() + parser = new $B.Parser(src, filename, 'file') + parser.call_invalid_rules = true + $B._PyPegen_parse(parser) + console.log('parsed invalid rules') + } }else{ var root = create_root_node({src, filename}, module, locals_id, parent_scope) dispatch_tokens(root) _ast = root.ast() } + // console.log('_ast', _ast) $B.parse_time += globalThis.performance.now() - t0 var future = $B.future_features(_ast, filename) var symtable = $B._PySymtable_Build(_ast, filename, future) diff --git a/www/src/py_builtin_functions.js b/www/src/py_builtin_functions.js index 329e708ca..be44a83fa 100644 --- a/www/src/py_builtin_functions.js +++ b/www/src/py_builtin_functions.js @@ -735,13 +735,25 @@ var $$eval = _b_.eval = function(){ } try{ - if($B.parser_to_ast){ - if(! _ast){ + if(! _ast){ + if($B.parser_to_ast){ var _mode = mode == 'eval' ? 'eval' : 'file' _ast = new $B.Parser(src, filename, _mode).parse() - } - }else{ - if(! _ast){ + }else if($B.py_tokens){ + // generated PEG parser + var _mode = mode == 'eval' ? 'eval' : 'file' + var parser = new $B.Parser(src, filename, _mode) + _ast = $B._PyPegen_parse(parser) + if(_ast === undefined){ + console.log('_ast undef', src) + console.log('tokens\n', parser.tokens) + alert() + parser = new $B.Parser(src, filename, 'file') + parser.call_invalid_rules = true + $B._PyPegen_parse(parser) + console.log('parsed invalid rules') + } + }else{ var root = $B.parser.create_root_node(src, '', frame[0], frame[2], 1) root.mode = mode @@ -785,6 +797,9 @@ var $$eval = _b_.eval = function(){ `_b_.print(result)\n` + `}` } + + console.log('eval js\n', $B.format_indent(js, 0)) + try{ var exec_func = new Function('$B', '_b_', local_name, global_name, diff --git a/www/src/py_bytes.js b/www/src/py_bytes.js index 8e5cedac2..fd2cf6a51 100644 --- a/www/src/py_bytes.js +++ b/www/src/py_bytes.js @@ -1888,6 +1888,8 @@ function fast_bytes(t){ } } +$B.fast_bytes = fast_bytes + bytes.$factory = function(){ return bytes.__new__.bind(null, bytes).apply(null, arguments) } diff --git a/www/src/python_parser_peg_version.js b/www/src/python_parser_peg_version.js index 025afceee..814433514 100644 --- a/www/src/python_parser_peg_version.js +++ b/www/src/python_parser_peg_version.js @@ -347,6 +347,7 @@ var Parser = $B.Parser = function(src, filename, mode){ } Parser.prototype.parse = function(){ + console.log('parse') if(this.src.trim().length == 0){ // eg empty __init__.py return new $B.ast.Module([]) @@ -415,7 +416,7 @@ Parser.prototype.get_memo = function(rule, position){ var ignored = [$B.py_tokens.ENCODING, $B.py_tokens.NL, $B.py_tokens.COMMENT] - + Parser.prototype.read_token = function(){ while(true){ var next = this.tokenizer.next() diff --git a/www/src/python_tokenizer.js b/www/src/python_tokenizer.js index a9cef0267..b6c524b39 100644 --- a/www/src/python_tokenizer.js +++ b/www/src/python_tokenizer.js @@ -153,11 +153,17 @@ function Token(type, string, start, end, line){ res.num_type = $B.py_tokens[type] if(type == 'OP'){ res.num_type = $B.py_tokens[$B.EXACT_TOKEN_TYPES[string]] + }else if(type == 'NAME' && ['async', 'await'].includes(string)){ + res.num_type = $B.py_tokens[string.toUpperCase()] } res.lineno = start[0] res.col_offset = start[1] res.end_lineno = end[0] res.end_col_offset = end[1] + if(res.num_type == -1){ + console.log('res', res) + alert() + } }else{ res = {type, string, start, end, line} res[0] = type diff --git a/www/tests/parse_tests/test_generated_parser.html b/www/tests/parse_tests/test_generated_parser.html index 312402fce..d1c92ecc9 100644 --- a/www/tests/parse_tests/test_generated_parser.html +++ b/www/tests/parse_tests/test_generated_parser.html @@ -55,6 +55,7 @@ + @@ -81,9 +82,16 @@

Test generated PEG parser

parser = new $B.Parser(src, filename, 'file') parser.call_invalid_rules = true $B._PyPegen_parse(parser) + }else{ + var imported + var future = $B.future_features(_ast, filename) + var symtable = $B._PySymtable_Build(_ast, filename, future) + var js_obj = $B.js_from_root({ast: _ast, + symtable, + filename, + imported}) + console.log('conv to js ok, length', js_obj) } - console.log(filename, 'parsed in', window.performance.now() - t0, 'ms') - console.log('nb tokens', parser.tokens.length) }