From a12b4af5e8a301eb34bc3f3f9b0466cb28694c71 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Wed, 25 Oct 2023 16:35:46 +0200 Subject: [PATCH] Add exclude patterns to AST rewrites (#36) --- CHANGELOG.md | 5 ++ pyodide_pack/ast_rewrite.py | 67 +++++++++++++++++++++++--- pyodide_pack/tests/test_ast_rewrite.py | 20 +++++++- 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf8e28..e611f09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support for packages installed via micropip in `pyodide pack` [#31](https://github.com/pyodide/pyodide-pack/pull/31) +## Fixed + + - Fix a syntax error when stripping docstrings from a function with an empty body + [#35](https://github.com/pyodide/pyodide-pack/pull/35) + ### Changed diff --git a/pyodide_pack/ast_rewrite.py b/pyodide_pack/ast_rewrite.py index 18cb27b..98e0832 100644 --- a/pyodide_pack/ast_rewrite.py +++ b/pyodide_pack/ast_rewrite.py @@ -1,4 +1,5 @@ import ast +import fnmatch import shutil import sys import zipfile @@ -7,6 +8,11 @@ import typer +STRIP_DOCSTRING_EXCLUDES: list[str] = [] +STRIP_DOCSTRING_MODULE_EXCLUDES: list[str] = [ + "numpy/*" # known issue for v1.25 to double check for v1.26 +] + class _StripDocstringsTransformer(ast.NodeTransformer): """Strip docstring in an AST tree. @@ -18,6 +24,10 @@ def visit_FunctionDef(self, node): """Remove the docstring from the function definition""" if ast.get_docstring(node, clean=False) is not None: del node.body[0] + if not len(node.body): + # Nothing left in the body, add a pass statement + node.body.append(ast.Pass()) + # Continue processing the function's body self.generic_visit(node) return node @@ -26,6 +36,14 @@ def visit_FunctionDef(self, node): visit_ClassDef = visit_FunctionDef +def _path_matches_patterns(path: str, patterns: list[str]) -> bool: + """Check if a path matches any of the patterns.""" + for pattern in patterns: + if fnmatch.fnmatch(path, pattern): + return True + return False + + def _strip_module_docstring(tree: ast.Module) -> ast.Module: """Remove docstring from module. @@ -41,9 +59,42 @@ def _strip_module_docstring(tree: ast.Module) -> ast.Module: return tree +def _rewrite_py_code( + code: str, + file_name: str, + strip_docstrings: bool = False, + strip_module_docstrings: bool = False, +) -> str: + try: + tree = ast.parse(code) + except SyntaxError: + return code + try: + if strip_docstrings and not _path_matches_patterns( + file_name, STRIP_DOCSTRING_EXCLUDES + ): + tree = _strip_module_docstring(tree) + if strip_module_docstrings and not _path_matches_patterns( + file_name, STRIP_DOCSTRING_MODULE_EXCLUDES + ): + tree = _StripDocstringsTransformer().visit(tree) + uncommented_code = ast.unparse(tree) + except RecursionError: + # Some files (e.g. modules in sympy) produce a recursion error when running + # the node transformer on them + print(f"Skipping AST rewrite for {file_name} due to RecursionError") + uncommented_code = code + + return uncommented_code + + def main( input_dir: Path = typer.Argument(..., help="Path to the folder to compress"), strip_docstrings: bool = typer.Option(False, help="Strip docstrings"), + strip_module_docstrings: bool = typer.Option( + False, help="Strip module lebel docstrings" + ), + # py_compile: bool = typer.Option(False, help="py-compile files") ) -> None: """Minify a folder of Python files. @@ -65,16 +116,16 @@ def main( code = file.read_text() except UnicodeDecodeError: continue - - try: - tree = ast.parse(code) - except SyntaxError: + uncommented_code = _rewrite_py_code( + code, + file_name=str(file), + strip_docstrings=strip_docstrings, + strip_module_docstrings=strip_module_docstrings, + ) + + if uncommented_code is None: continue - if strip_docstrings: - tree = _strip_module_docstring(tree) - tree = _StripDocstringsTransformer().visit(tree) - uncommented_code = ast.unparse(tree) file.write_text(uncommented_code) n_processed += 1 diff --git a/pyodide_pack/tests/test_ast_rewrite.py b/pyodide_pack/tests/test_ast_rewrite.py index 283f8b6..a036be8 100644 --- a/pyodide_pack/tests/test_ast_rewrite.py +++ b/pyodide_pack/tests/test_ast_rewrite.py @@ -88,6 +88,24 @@ def bar(self): ) +def test_strip_docstrings_empty_function(): + src_code = ''' + def foo(): + """This is a docstring""" + ''' + tree = ast.parse(dedent(src_code)) + tree = _StripDocstringsTransformer().visit(tree) + assert ( + ast.unparse(tree) + == dedent( + """ + def foo(): + pass + """ + ).strip("\n") + ) + + def test_strip_module_docstrings(): src_code = ''' """This is a docstring""" @@ -126,7 +144,7 @@ def test_cli_minify(tmp_path): input_dir.mkdir() (input_dir / "pathlib.py").write_text(Path(pathlib.__file__).read_text()) - main(input_dir, strip_docstrings=False) + main(input_dir, strip_docstrings=False, strip_module_docstrings=False) output_path = tmp_path / "input_dir_stripped.zip" assert output_path.exists() # There is at least a 10% size reduction, though this test and API needs to be rewritten