Skip to content

Commit

Permalink
Add exclude patterns to AST rewrites (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
rth authored Oct 25, 2023
1 parent 85630db commit a12b4af
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add support for packages installed via micropip in `pyodide pack`
[#31](https://github.com/pyodide/pyodide-pack/pull/31)

## Fixed

- Fix a syntax error when stripping docstrings from a function with an empty body
[#35](https://github.com/pyodide/pyodide-pack/pull/35)


### Changed

Expand Down
67 changes: 59 additions & 8 deletions pyodide_pack/ast_rewrite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import ast
import fnmatch
import shutil
import sys
import zipfile
Expand All @@ -7,6 +8,11 @@

import typer

STRIP_DOCSTRING_EXCLUDES: list[str] = []
STRIP_DOCSTRING_MODULE_EXCLUDES: list[str] = [
"numpy/*" # known issue for v1.25 to double check for v1.26
]


class _StripDocstringsTransformer(ast.NodeTransformer):
"""Strip docstring in an AST tree.
Expand All @@ -18,6 +24,10 @@ def visit_FunctionDef(self, node):
"""Remove the docstring from the function definition"""
if ast.get_docstring(node, clean=False) is not None:
del node.body[0]
if not len(node.body):
# Nothing left in the body, add a pass statement
node.body.append(ast.Pass())

# Continue processing the function's body
self.generic_visit(node)
return node
Expand All @@ -26,6 +36,14 @@ def visit_FunctionDef(self, node):
visit_ClassDef = visit_FunctionDef


def _path_matches_patterns(path: str, patterns: list[str]) -> bool:
"""Check if a path matches any of the patterns."""
for pattern in patterns:
if fnmatch.fnmatch(path, pattern):
return True
return False


def _strip_module_docstring(tree: ast.Module) -> ast.Module:
"""Remove docstring from module.
Expand All @@ -41,9 +59,42 @@ def _strip_module_docstring(tree: ast.Module) -> ast.Module:
return tree


def _rewrite_py_code(
code: str,
file_name: str,
strip_docstrings: bool = False,
strip_module_docstrings: bool = False,
) -> str:
try:
tree = ast.parse(code)
except SyntaxError:
return code
try:
if strip_docstrings and not _path_matches_patterns(
file_name, STRIP_DOCSTRING_EXCLUDES
):
tree = _strip_module_docstring(tree)
if strip_module_docstrings and not _path_matches_patterns(
file_name, STRIP_DOCSTRING_MODULE_EXCLUDES
):
tree = _StripDocstringsTransformer().visit(tree)
uncommented_code = ast.unparse(tree)
except RecursionError:
# Some files (e.g. modules in sympy) produce a recursion error when running
# the node transformer on them
print(f"Skipping AST rewrite for {file_name} due to RecursionError")
uncommented_code = code

return uncommented_code


def main(
input_dir: Path = typer.Argument(..., help="Path to the folder to compress"),
strip_docstrings: bool = typer.Option(False, help="Strip docstrings"),
strip_module_docstrings: bool = typer.Option(
False, help="Strip module lebel docstrings"
),
# py_compile: bool = typer.Option(False, help="py-compile files")
) -> None:
"""Minify a folder of Python files.
Expand All @@ -65,16 +116,16 @@ def main(
code = file.read_text()
except UnicodeDecodeError:
continue

try:
tree = ast.parse(code)
except SyntaxError:
uncommented_code = _rewrite_py_code(
code,
file_name=str(file),
strip_docstrings=strip_docstrings,
strip_module_docstrings=strip_module_docstrings,
)

if uncommented_code is None:
continue
if strip_docstrings:
tree = _strip_module_docstring(tree)
tree = _StripDocstringsTransformer().visit(tree)

uncommented_code = ast.unparse(tree)
file.write_text(uncommented_code)
n_processed += 1

Expand Down
20 changes: 19 additions & 1 deletion pyodide_pack/tests/test_ast_rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,24 @@ def bar(self):
)


def test_strip_docstrings_empty_function():
src_code = '''
def foo():
"""This is a docstring"""
'''
tree = ast.parse(dedent(src_code))
tree = _StripDocstringsTransformer().visit(tree)
assert (
ast.unparse(tree)
== dedent(
"""
def foo():
pass
"""
).strip("\n")
)


def test_strip_module_docstrings():
src_code = '''
"""This is a docstring"""
Expand Down Expand Up @@ -126,7 +144,7 @@ def test_cli_minify(tmp_path):
input_dir.mkdir()
(input_dir / "pathlib.py").write_text(Path(pathlib.__file__).read_text())

main(input_dir, strip_docstrings=False)
main(input_dir, strip_docstrings=False, strip_module_docstrings=False)
output_path = tmp_path / "input_dir_stripped.zip"
assert output_path.exists()
# There is at least a 10% size reduction, though this test and API needs to be rewritten
Expand Down

0 comments on commit a12b4af

Please sign in to comment.