Skip to content

Commit

Permalink
SFA: Add subsystem for loading single-file applications
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Nov 2, 2024
1 parent 53adaa7 commit 1873253
Show file tree
Hide file tree
Showing 9 changed files with 346 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased
- nlp: Updated dependencies langchain, langchain-text-splitters, unstructured
- CI: Verify compatibility with Python 3.13
- SFA: Added a subsystem for loading single-file applications

## 2024-03-07 v0.0.9
- Testing: Add `pueblo.testing.notebook.{list_path,generate_tests}`
Expand Down
31 changes: 31 additions & 0 deletions pueblo/sfa/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Single File Applications (sfa)


## About

Single File Applications, a few [DWIM] conventions and tools to
install and invoke Python applications defined within single files.


## Preamble

Because, how to invoke an arbitrary Python entrypoint interactively?
```shell
python -m tests.testdata.folder.dummy -c "main()"
python -c "from tests.testdata.folder.dummy import main; main()"
```
Remark: The first command looks good, but does not work, because
each option `-m` and `-c` terminates the option list, so they can
not be used together.


## Synopsis

```shell
# Invoke Python entrypoint with given specification.
PYTHONPATH=$(pwd) sfa run tests.testdata.entrypoint:main
sfa run tests/testdata/entrypoint.py:main
```


[DWIM]: https://en.wikipedia.org/wiki/DWIM
Empty file added pueblo/sfa/__init__.py
Empty file.
41 changes: 41 additions & 0 deletions pueblo/sfa/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import logging
import sys

from pueblo.sfa.core import run
from pueblo.util.program import MiniRunner

logger = logging.getLogger()


class SFARunner(MiniRunner):

def configure(self):
subparsers = self.parser.add_subparsers(dest="command")

subcommand_run = subparsers.add_parser("run", help="Invoke application")
subcommand_run.add_argument("target")

def run(self):
if not self.args.target:
logger.error("Unable to invoke target: Not given or empty")
self.parser.print_help()
sys.exit(1)

try:
run(self.args.target, self.args.__dict__)
except NotImplementedError as ex:
logger.critical(ex)
sys.exit(1)


def main(args=None, prog_name=None):
"""
Main program.
- Setup logging.
- Read command-line parameters.
- Run sanity checks.
- Invoke runner.
"""
runner = SFARunner(name=prog_name, args_input=args)
return runner.run()
188 changes: 188 additions & 0 deletions pueblo/sfa/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import importlib.util
import sys
import typing as t
from pathlib import Path
from types import ModuleType
from urllib.parse import urlparse

from attrs import define
from upath import UPath


class InvalidTarget(Exception):
pass


@define
class ApplicationAddress:
target: str
property: str
is_url: bool = False

@classmethod
def from_spec(cls, spec: str, default_property=None):
"""
Parse entrypoint specification to application address instance.
:param spec: Entrypoint address (e.g. module 'acme.app:main', file path '/path/to/acme/app.py:main')
:param default_property: Name of the property to load if not specified in target (default: "api")
:return:
"""
if cls.is_valid_url(spec):
# Decode launch target location address from URL.
# URL: https://example.org/acme/app.py#foo
upath = UPath(spec)
frag = upath._url.fragment
upath = upath.with_name(upath.name.replace(f"#{frag}", ""))
target = str(upath)
prop = frag
is_url = True

else:
# Decode launch target location address from Python module or path.
# Module: acme.app:foo
# Path: /path/to/acme/app.py:foo
target_fragments = spec.split(":")
if len(target_fragments) > 1:
target = target_fragments[0]
prop = target_fragments[1]
else:
target = target_fragments[0]
if default_property is None:
raise ValueError("Property can not be discovered, and no default property was supplied")
prop = default_property
is_url = False

return cls(target=target, property=prop, is_url=is_url)

@staticmethod
def is_valid_url(url) -> bool:
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False

def install(self):
pass


@define
class SingleFileApplication:
"""
Load Python code from any source, addressed by file path or module name.
Warning:
This component executes arbitrary Python code. Ensure the target is from a trusted
source to prevent security vulnerabilities.
Args:
address: Application entrypoint address
Example:
>>> app = SingleFileApplication.from_spec("myapp.api:server")
>>> app.load()
>>> app.run()
""" # noqa: E501

address: ApplicationAddress
_module: t.Optional[ModuleType] = None
_entrypoint: t.Optional[t.Callable] = None

@classmethod
def from_spec(cls, spec: str, default_property=None):
address = ApplicationAddress.from_spec(spec=spec, default_property=default_property)
return cls(address=address)

def run(self, *args, **kwargs):
return t.cast(t.Callable, self._entrypoint)(*args, **kwargs)

def load(self):
target = self.address.target
prop = self.address.property

# Sanity checks, as suggested by @coderabbitai. Thanks.
if not target or (":" in target and len(target.split(":")) != 2):
raise InvalidTarget(
f"Invalid target format: {target}. "
"Use either a Python module entrypoint specification, "
"a filesystem path, or a remote URL."
)

# Validate property name follows Python identifier rules.
if not prop.isidentifier():
raise ValueError(f"Invalid property name: {prop}")

# Import launch target. Treat input location either as a filesystem path
# (/path/to/acme/app.py), or as a module address specification (acme.app).
self.load_any()

# Invoke launch target.
msg_prefix = f"Failed to import: {target}"
try:
entrypoint = getattr(self._module, prop, None)
if entrypoint is None:
raise AttributeError(f"Module has no instance attribute '{prop}'")
if not callable(entrypoint):
raise TypeError(f"Entrypoint is not callable: {entrypoint}")
self._entrypoint = entrypoint
except AttributeError as ex:
raise AttributeError(f"{msg_prefix}: {ex}") from ex
except ImportError as ex:
raise ImportError(f"{msg_prefix}: {ex}") from ex
except TypeError as ex:
raise TypeError(f"{msg_prefix}: {ex}") from ex
except Exception as ex:
raise RuntimeError(f"{msg_prefix}: Unexpected error: {ex}") from ex

def load_any(self):
if self.address.is_url:
mod = None
else:
path = Path(self.address.target)
if path.is_file():
mod = self.load_file(path)
else:
mod = importlib.import_module(self.address.target)
self._module = mod

@staticmethod
def load_file(path: Path) -> ModuleType:
"""
Load a Python file as a module using importlib.
Args:
path: Path to the Python file to load
Returns:
The loaded module object
Raises:
ImportError: If the module cannot be loaded
"""

# Validate file extension
if path.suffix != ".py":
raise ValueError(f"File must have .py extension: {path}")

# Use absolute path hash for uniqueness of name.
unique_id = hash(str(path.absolute()))
name = f"__{path.stem}_{unique_id}__"

spec = importlib.util.spec_from_file_location(name, path)
if spec is None or spec.loader is None:
raise ImportError(f"Failed loading module from file: {path}")
app = importlib.util.module_from_spec(spec)
sys.modules[name] = app
try:
spec.loader.exec_module(app)
return app
except Exception as ex:
sys.modules.pop(name, None)
raise ImportError(f"Failed to execute module '{app}': {ex}") from ex


def run(spec: str, options: t.Dict[str, str]):
app = SingleFileApplication.from_spec(spec=spec)
app.load()
return app.run()
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ dependencies = [
]

optional-dependencies.all = [
"pueblo[cli,dataframe,fileio,nlp,notebook,proc,testing,web]",
"pueblo[cli,dataframe,fileio,nlp,notebook,proc,sfa,testing,web]",
]
optional-dependencies.cli = [
"click<9",
Expand Down Expand Up @@ -114,6 +114,9 @@ optional-dependencies.release = [
"build<2",
"twine<6",
]
optional-dependencies.sfa = [
"attrs",
]
optional-dependencies.test = [
"pueblo[testing]",
]
Expand All @@ -133,6 +136,7 @@ urls.homepage = "https://github.com/pyveci/pueblo"
urls.repository = "https://github.com/pyveci/pueblo"
scripts.ngr = "pueblo.ngr.cli:main"
scripts.pueblo = "pueblo.cli:cli"
scripts.sfa = "pueblo.sfa.cli:main"

[tool.setuptools]
# https://setuptools.pypa.io/en/latest/userguide/package_discovery.html
Expand Down
77 changes: 77 additions & 0 deletions tests/test_sfa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# ruff: noqa: S603, S607
import os
import subprocess
from pathlib import Path

import pytest

from pueblo.sfa.core import ApplicationAddress, SingleFileApplication


def test_address_module():
address = ApplicationAddress.from_spec("acme.app:main")
assert address.target == "acme.app"
assert address.property == "main"
assert address.is_url is False


def test_address_path():
address = ApplicationAddress.from_spec("/path/to/acme/app.py:main")
assert address.target == "/path/to/acme/app.py"
assert address.property == "main"
assert address.is_url is False


def test_address_url():
address = ApplicationAddress.from_spec("https://example.org/acme/app.py#main")
assert address.target == "https://example.org/acme/app.py"
assert address.property == "main"
assert address.is_url is True


@pytest.mark.parametrize(
"spec",
[
"tests.testdata.entrypoint:main",
"tests/testdata/entrypoint.py:main",
],
)
def test_application_api_success(capsys, spec):
app = SingleFileApplication.from_spec(spec)
app.load()
outcome = app.run()

assert outcome == 42
assert "Räuber Hotzenplotz" in capsys.readouterr().out


@pytest.mark.parametrize(
"spec",
[
"pueblo.context:pueblo_cache_path",
"pueblo/context.py:pueblo_cache_path",
],
)
def test_application_api_not_callable(capsys, spec):
app = SingleFileApplication.from_spec(spec)
with pytest.raises(TypeError) as ex:
app.load()
assert ex.match("Failed to import: .+: Entrypoint is not callable")


@pytest.mark.parametrize(
"spec",
[
"tests.testdata.entrypoint:main",
"tests/testdata/entrypoint.py:main",
],
)
def test_application_cli(mocker, capfd, spec):
mocker.patch.dict(os.environ, {"PYTHONPATH": str(Path.cwd())})
subprocess.check_call(["sfa", "run", spec])
assert "Räuber Hotzenplotz" in capfd.readouterr().out


def test_application_cli_non_callable(capfd):
subprocess.call(["sfa", "run", "pueblo.context:pueblo_cache_path"])
assert "TypeError: Failed to import: pueblo.context: Entrypoint is not callable" in capfd.readouterr().err
Empty file added tests/testdata/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions tests/testdata/entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def main():
print("Hallo, Räuber Hotzenplotz.") # noqa: T201
return 42

0 comments on commit 1873253

Please sign in to comment.