From 72addafb7cd86bfe4642bbf53d76b71f885f9bfd Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sat, 2 Nov 2024 14:47:03 +0100 Subject: [PATCH] SFA: Add subsystem for loading single-file applications --- CHANGES.md | 1 + pueblo/sfa/README.md | 31 ++++++ pueblo/sfa/__init__.py | 0 pueblo/sfa/cli.py | 41 ++++++++ pueblo/sfa/core.py | 190 +++++++++++++++++++++++++++++++++++ pueblo/util/program.py | 17 ++-- pyproject.toml | 6 +- tests/test_sfa.py | 80 +++++++++++++++ tests/testdata/__init__.py | 0 tests/testdata/entrypoint.py | 3 + 10 files changed, 359 insertions(+), 10 deletions(-) create mode 100644 pueblo/sfa/README.md create mode 100644 pueblo/sfa/__init__.py create mode 100644 pueblo/sfa/cli.py create mode 100644 pueblo/sfa/core.py create mode 100644 tests/test_sfa.py create mode 100644 tests/testdata/__init__.py create mode 100644 tests/testdata/entrypoint.py diff --git a/CHANGES.md b/CHANGES.md index 79e7fe3..ace00d7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## Unreleased - nlp: Updated dependencies langchain, langchain-text-splitters, unstructured - CI: Verify compatibility with Python 3.13 +- SFA: Added a subsystem for loading single-file applications ## 2024-03-07 v0.0.9 - Testing: Add `pueblo.testing.notebook.{list_path,generate_tests}` diff --git a/pueblo/sfa/README.md b/pueblo/sfa/README.md new file mode 100644 index 0000000..4219558 --- /dev/null +++ b/pueblo/sfa/README.md @@ -0,0 +1,31 @@ +# Single File Applications (sfa) + + +## About + +Single File Applications, a few [DWIM] conventions and tools to +install and invoke Python applications defined within single files. + + +## Preamble + +Because, how to invoke an arbitrary Python entrypoint interactively? +```shell +python -m tests.testdata.folder.dummy -c "main()" +python -c "from tests.testdata.folder.dummy import main; main()" +``` +Remark: The first command looks good, but does not work, because +each option `-m` and `-c` terminates the option list, so they can +not be used together. + + +## Synopsis + +```shell +# Invoke Python entrypoint with given specification. +PYTHONPATH=$(pwd) sfa run tests.testdata.entrypoint:main +sfa run tests/testdata/entrypoint.py:main +``` + + +[DWIM]: https://en.wikipedia.org/wiki/DWIM diff --git a/pueblo/sfa/__init__.py b/pueblo/sfa/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pueblo/sfa/cli.py b/pueblo/sfa/cli.py new file mode 100644 index 0000000..7293261 --- /dev/null +++ b/pueblo/sfa/cli.py @@ -0,0 +1,41 @@ +import logging +import sys + +from pueblo.sfa.core import run +from pueblo.util.program import MiniRunner + +logger = logging.getLogger() + + +class SFARunner(MiniRunner): + + def configure(self): + subparsers = self.parser.add_subparsers(dest="command") + + subcommand_run = subparsers.add_parser("run", help="Invoke application") + subcommand_run.add_argument("target") + + def run(self): + if not self.args.target: + logger.error("Unable to invoke target: Not given or empty") + self.parser.print_help() + sys.exit(1) + + try: + run(self.args.target, self.args.__dict__) + except NotImplementedError as ex: + logger.critical(ex) + sys.exit(1) + + +def main(args=None, prog_name=None): + """ + Main program. + + - Setup logging. + - Read command-line parameters. + - Run sanity checks. + - Invoke runner. + """ + runner = SFARunner(name=prog_name, args_input=args) + return runner.run() diff --git a/pueblo/sfa/core.py b/pueblo/sfa/core.py new file mode 100644 index 0000000..4b2c628 --- /dev/null +++ b/pueblo/sfa/core.py @@ -0,0 +1,190 @@ +import importlib.util +import sys +import typing as t +from pathlib import Path +from types import ModuleType +from urllib.parse import urlparse + +from attrs import define + + +class InvalidTarget(Exception): + pass + + +@define +class ApplicationAddress: + target: str + property: str + is_url: bool = False + + @classmethod + def from_spec(cls, spec: str, default_property=None): + """ + Parse entrypoint specification to application address instance. + + https://packaging.python.org/en/latest/specifications/entry-points/ + + :param spec: Entrypoint address (e.g. module 'acme.app:main', file path '/path/to/acme/app.py:main') + :param default_property: Name of the property to load if not specified in target (default: "api") + :return: + """ + if cls.is_valid_url(spec): + # Decode launch target location address from URL. + # URL: https://example.org/acme/app.py#foo + url = urlparse(spec) + frag = url.fragment + target = url.geturl().replace(f"#{frag}", "") + prop = frag + is_url = True + + else: + # Decode launch target location address from Python module or path. + # Module: acme.app:foo + # Path: /path/to/acme/app.py:foo + target_fragments = spec.split(":") + if len(target_fragments) > 1: + target = target_fragments[0] + prop = target_fragments[1] + else: + target = target_fragments[0] + if default_property is None: + raise ValueError("Property can not be discovered, and no default property was supplied") + prop = default_property + is_url = False + + return cls(target=target, property=prop, is_url=is_url) + + @staticmethod + def is_valid_url(url) -> bool: + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + + def install(self): + pass + + +@define +class SingleFileApplication: + """ + Load Python code from any source, addressed by file path or module name. + + https://packaging.python.org/en/latest/specifications/entry-points/ + + Warning: + This component executes arbitrary Python code. Ensure the target is from a trusted + source to prevent security vulnerabilities. + + Args: + address: Application entrypoint address + + Example: + >>> app = SingleFileApplication.from_spec("myapp.api:server") + >>> app.load() + >>> app.run() + """ # noqa: E501 + + address: ApplicationAddress + _module: t.Optional[ModuleType] = None + _entrypoint: t.Optional[t.Callable] = None + + @classmethod + def from_spec(cls, spec: str, default_property=None): + address = ApplicationAddress.from_spec(spec=spec, default_property=default_property) + return cls(address=address) + + def run(self, *args, **kwargs): + return t.cast(t.Callable, self._entrypoint)(*args, **kwargs) + + def load(self): + target = self.address.target + prop = self.address.property + + # Sanity checks, as suggested by @coderabbitai. Thanks. + if not target or (":" in target and len(target.split(":")) != 2): + raise InvalidTarget( + f"Invalid target format: {target}. " + "Use either a Python module entrypoint specification, " + "a filesystem path, or a remote URL." + ) + + # Validate property name follows Python identifier rules. + if not prop.isidentifier(): + raise ValueError(f"Invalid property name: {prop}") + + # Import launch target. Treat input location either as a filesystem path + # (/path/to/acme/app.py), or as a module address specification (acme.app). + self.load_any() + + # Invoke launch target. + msg_prefix = f"Failed to import: {target}" + try: + entrypoint = getattr(self._module, prop, None) + if entrypoint is None: + raise AttributeError(f"Module has no instance attribute '{prop}'") + if not callable(entrypoint): + raise TypeError(f"Entrypoint is not callable: {entrypoint}") + self._entrypoint = entrypoint + except AttributeError as ex: + raise AttributeError(f"{msg_prefix}: {ex}") from ex + except ImportError as ex: + raise ImportError(f"{msg_prefix}: {ex}") from ex + except TypeError as ex: + raise TypeError(f"{msg_prefix}: {ex}") from ex + except Exception as ex: + raise RuntimeError(f"{msg_prefix}: Unexpected error: {ex}") from ex + + def load_any(self): + if self.address.is_url: + mod = None + else: + path = Path(self.address.target) + if path.is_file(): + mod = self.load_file(path) + else: + mod = importlib.import_module(self.address.target) + self._module = mod + + @staticmethod + def load_file(path: Path) -> ModuleType: + """ + Load a Python file as a module using importlib. + + Args: + path: Path to the Python file to load + + Returns: + The loaded module object + + Raises: + ImportError: If the module cannot be loaded + """ + + # Validate file extension + if path.suffix != ".py": + raise ValueError(f"File must have .py extension: {path}") + + # Use absolute path hash for uniqueness of name. + unique_id = hash(str(path.absolute())) + name = f"__{path.stem}_{unique_id}__" + + spec = importlib.util.spec_from_file_location(name, path) + if spec is None or spec.loader is None: + raise ImportError(f"Failed loading module from file: {path}") + app = importlib.util.module_from_spec(spec) + sys.modules[name] = app + try: + spec.loader.exec_module(app) + return app + except Exception as ex: + sys.modules.pop(name, None) + raise ImportError(f"Failed to execute module '{app}': {ex}") from ex + + +def run(spec: str, options: t.Dict[str, str]): + app = SingleFileApplication.from_spec(spec=spec) + app.load() + return app.run() diff --git a/pueblo/util/program.py b/pueblo/util/program.py index 7faafa2..f31194e 100644 --- a/pueblo/util/program.py +++ b/pueblo/util/program.py @@ -4,23 +4,22 @@ import typing as t from argparse import ArgumentDefaultsHelpFormatter -from attrs import define - from pueblo import __version__, setup_logging logger = logging.getLogger() -@define class MiniRunner: - name: t.Any - args_input: t.Any - _parser: t.Optional[argparse.ArgumentParser] = None - _parsed_args: t.Optional[argparse.Namespace] = None - _runner: t.Optional[t.Callable] = None + def __init__(self, name: t.Any, args_input: t.Any): + + self.name = name + self.args_input = args_input + + self._parser: t.Optional[argparse.ArgumentParser] = None + self._parsed_args: t.Optional[argparse.Namespace] = None + self._runner: t.Optional[t.Callable] = None - def __attrs_post_init__(self): self.setup() @property diff --git a/pyproject.toml b/pyproject.toml index cd49ac8..7f51531 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ ] optional-dependencies.all = [ - "pueblo[cli,dataframe,fileio,nlp,notebook,proc,testing,web]", + "pueblo[cli,dataframe,fileio,nlp,notebook,proc,sfa,testing,web]", ] optional-dependencies.cli = [ "click<9", @@ -114,6 +114,9 @@ optional-dependencies.release = [ "build<2", "twine<6", ] +optional-dependencies.sfa = [ + "attrs", +] optional-dependencies.test = [ "pueblo[testing]", ] @@ -133,6 +136,7 @@ urls.homepage = "https://github.com/pyveci/pueblo" urls.repository = "https://github.com/pyveci/pueblo" scripts.ngr = "pueblo.ngr.cli:main" scripts.pueblo = "pueblo.cli:cli" +scripts.sfa = "pueblo.sfa.cli:main" [tool.setuptools] # https://setuptools.pypa.io/en/latest/userguide/package_discovery.html diff --git a/tests/test_sfa.py b/tests/test_sfa.py new file mode 100644 index 0000000..39bbb7a --- /dev/null +++ b/tests/test_sfa.py @@ -0,0 +1,80 @@ +# ruff: noqa: S603, S607 +import os +import subprocess +from pathlib import Path + +import pytest + +pytest.importorskip("attrs") + + +from pueblo.sfa.core import ApplicationAddress, SingleFileApplication # noqa: E402 + + +def test_address_module(): + address = ApplicationAddress.from_spec("acme.app:main") + assert address.target == "acme.app" + assert address.property == "main" + assert address.is_url is False + + +def test_address_path(): + address = ApplicationAddress.from_spec("/path/to/acme/app.py:main") + assert address.target == "/path/to/acme/app.py" + assert address.property == "main" + assert address.is_url is False + + +def test_address_url(): + address = ApplicationAddress.from_spec("https://example.org/acme/app.py#main") + assert address.target == "https://example.org/acme/app.py" + assert address.property == "main" + assert address.is_url is True + + +@pytest.mark.parametrize( + "spec", + [ + "tests.testdata.entrypoint:main", + "tests/testdata/entrypoint.py:main", + ], +) +def test_application_api_success(capsys, spec): + app = SingleFileApplication.from_spec(spec) + app.load() + outcome = app.run() + + assert outcome == 42 + assert "Räuber Hotzenplotz" in capsys.readouterr().out + + +@pytest.mark.parametrize( + "spec", + [ + "pueblo.context:pueblo_cache_path", + "pueblo/context.py:pueblo_cache_path", + ], +) +def test_application_api_not_callable(capsys, spec): + app = SingleFileApplication.from_spec(spec) + with pytest.raises(TypeError) as ex: + app.load() + assert ex.match("Failed to import: .+: Entrypoint is not callable") + + +@pytest.mark.parametrize( + "spec", + [ + "tests.testdata.entrypoint:main", + "tests/testdata/entrypoint.py:main", + ], +) +def test_application_cli(mocker, capfd, spec): + mocker.patch.dict(os.environ, {"PYTHONPATH": str(Path.cwd())}) + subprocess.check_call(["sfa", "run", spec]) + assert "Räuber Hotzenplotz" in capfd.readouterr().out + + +def test_application_cli_non_callable(capfd): + subprocess.call(["sfa", "run", "pueblo.context:pueblo_cache_path"]) + assert "TypeError: Failed to import: pueblo.context: Entrypoint is not callable" in capfd.readouterr().err diff --git a/tests/testdata/__init__.py b/tests/testdata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/entrypoint.py b/tests/testdata/entrypoint.py new file mode 100644 index 0000000..f6705b5 --- /dev/null +++ b/tests/testdata/entrypoint.py @@ -0,0 +1,3 @@ +def main(): + print("Hallo, Räuber Hotzenplotz.") # noqa: T201 + return 42