Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use async io where possible to improve runtime performance #163

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ tomli==1.2.3
tqdm==4.64.0
twine==3.8.0
typed-ast==1.5.4
webencodings==0.5.1
webencodings==0.5.1
pytest-asyncio==0.21.1
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ text-unidecode==1.3
toml==0.10.2
urllib3==1.26.11
zipp==3.8.1
aiohttp==3.8.6
aiofiles==23.2.1
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ install_requires =
toml >= 0.10.0
mock >= 3.0.5
packvers >= 21.5
aiohttp >= 3.8
aiofiles >= 23.1

[options.packages.find]
where = src

Expand All @@ -86,6 +89,7 @@ testing =
black
isort
pytest-rerunfailures
pytest-asyncio >= 0.21

docs =
Sphinx>=5.0.2
Expand Down
132 changes: 95 additions & 37 deletions src/python_inspector/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
# See https://aboutcode-orgnexB/python-inspector for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import asyncio
import os
from netrc import netrc
from typing import Dict
from typing import List
from typing import NamedTuple
from typing import Sequence
from typing import Tuple

from packageurl import PackageURL
from packvers.requirements import Requirement
Expand All @@ -26,7 +27,7 @@
from _packagedcode.pypi import PipRequirementsFileHandler
from _packagedcode.pypi import PythonSetupPyHandler
from _packagedcode.pypi import can_process_dependent_package
from python_inspector import DEFAULT_PYTHON_VERSION
from _packagedcode.pypi import get_resolved_purl
from python_inspector import dependencies
from python_inspector import utils
from python_inspector import utils_pypi
Expand All @@ -39,6 +40,7 @@
from python_inspector.resolution import get_python_version_from_env_tag
from python_inspector.resolution import get_reqs_insecurely
from python_inspector.resolution import get_requirements_from_python_manifest
from python_inspector.utils import Candidate
from python_inspector.utils_pypi import PLATFORMS_BY_OS
from python_inspector.utils_pypi import PYPI_SIMPLE_URL
from python_inspector.utils_pypi import Environment
Expand All @@ -54,7 +56,7 @@ class Resolution(NamedTuple):
``files`` is a parsed list of input file data.
"""

resolution: Dict
resolution: List[Dict]
packages: List[PackageData]
files: List[Dict]

Expand Down Expand Up @@ -295,21 +297,27 @@ def resolve_dependencies(
pdt_output=pdt_output,
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
ignore_errors=ignore_errors,
verbose=verbose,
printer=printer,
)

packages = []
async def gather_pypi_data():
async def get_pypi_data(package):
data = await get_pypi_data_from_purl(
package, repos=repos, environment=environment, prefer_source=prefer_source
)

for package in purls:
packages.extend(
[
pkg.to_dict()
for pkg in list(
get_pypi_data_from_purl(
package, repos=repos, environment=environment, prefer_source=prefer_source
)
)
],
)
if verbose:
printer(f" retrieved package '{package}'")

return data

if verbose:
printer(f"retrieve package data from pypi:")

return await asyncio.gather(*[get_pypi_data(package) for package in purls])

packages = [pkg.to_dict() for pkg in asyncio.run(gather_pypi_data()) if pkg is not None]

if verbose:
printer("done!")
Expand All @@ -325,14 +333,16 @@ def resolve_dependencies(


def resolve(
direct_dependencies,
environment,
repos=tuple(),
as_tree=False,
max_rounds=200000,
pdt_output=False,
analyze_setup_py_insecurely=False,
ignore_errors=False,
direct_dependencies: List[DependentPackage],
environment: Environment,
repos: Sequence[utils_pypi.PypiSimpleRepository] = tuple(),
as_tree: bool = False,
max_rounds: int = 200000,
pdt_output: bool = False,
analyze_setup_py_insecurely: bool = False,
ignore_errors: bool = False,
verbose: bool = False,
printer=print,
):
"""
Resolve dependencies given a ``direct_dependencies`` list of
Expand All @@ -359,6 +369,8 @@ def resolve(
pdt_output=pdt_output,
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
ignore_errors=ignore_errors,
verbose=verbose,
printer=printer,
)

return resolved_dependencies, packages
Expand All @@ -373,33 +385,79 @@ def get_resolved_dependencies(
pdt_output: bool = False,
analyze_setup_py_insecurely: bool = False,
ignore_errors: bool = False,
):
verbose: bool = False,
printer=print,
) -> Tuple[List[Dict], List[str]]:
"""
Return resolved dependencies of a ``requirements`` list of Requirement for
an ``enviroment`` Environment. The resolved dependencies are formatted as
an ``environment`` Environment. The resolved dependencies are formatted as
parent/children or a nested tree if ``as_tree`` is True.

Used the provided ``repos`` list of PypiSimpleRepository.
If empty, use instead the PyPI.org JSON API exclusively instead
If empty, use instead the PyPI.org JSON API exclusively instead.
"""
provider = PythonInputProvider(
environment=environment,
repos=repos,
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
ignore_errors=ignore_errors,
)

# gather version data for all requirements concurrently in advance.

async def gather_version_data():
async def get_version_data(name: str):
versions = await provider.fill_versions_for_package(name)

if verbose:
printer(f" retrieved versions for package '{name}'")

return versions

if verbose:
printer(f"versions:")

return await asyncio.gather(
*[get_version_data(requirement.name) for requirement in requirements]
)

asyncio.run(gather_version_data())

# gather dependencies for all pinned requirements concurrently in advance.

async def gather_dependencies():
async def get_dependencies(requirement: Requirement):
purl = PackageURL(type="pypi", name=requirement.name)
resolved_purl = get_resolved_purl(purl=purl, specifiers=requirement.specifier)

if resolved_purl:
purl = resolved_purl.purl
candidate = Candidate(requirement.name, purl.version, requirement.extras)
await provider.fill_requirements_for_package(purl, candidate)

if verbose:
printer(f" retrieved dependencies for requirement '{str(purl)}'")

if verbose:
printer(f"dependencies:")

return await asyncio.gather(
*[get_dependencies(requirement) for requirement in requirements]
)

asyncio.run(gather_dependencies())

resolver = Resolver(
provider=PythonInputProvider(
environment=environment,
repos=repos,
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
ignore_errors=ignore_errors,
),
provider=provider,
reporter=BaseReporter(),
)
resolver_results = resolver.resolve(
requirements=requirements, max_rounds=max_rounds)

package_list = get_package_list(results=resolver_results)
if pdt_output:
return (format_pdt_tree(resolver_results), package_list)
return (
format_resolution(resolver_results, as_tree=as_tree),
package_list,
)
return format_pdt_tree(resolver_results), package_list
return format_resolution(resolver_results, as_tree=as_tree), package_list


def get_requirements_from_direct_dependencies(
Expand Down
22 changes: 12 additions & 10 deletions src/python_inspector/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,29 @@
# See https://github.com/nexB/skeleton for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
from typing import Iterable
from typing import Mapping

from packageurl import PackageURL
from packvers.requirements import Requirement
from pip_requirements_parser import InstallRequirement

from _packagedcode import models
from _packagedcode.models import DependentPackage
from _packagedcode.pypi import PipRequirementsFileHandler
from _packagedcode.pypi import get_requirements_txt_dependencies

"""
Utilities to resolve dependencies .
Utilities to resolve dependencies.
"""

TRACE = False


def get_dependencies_from_requirements(requirements_file="requirements.txt"):
def get_dependencies_from_requirements(
requirements_file="requirements.txt",
) -> Iterable[DependentPackage]:
"""
Yield DependentPackage for each requirement in a `requirement`
file.
Yield DependentPackage for each requirement in a `requirement` file.
"""
dependent_packages, _ = get_requirements_txt_dependencies(
location=requirements_file, include_nested=True
Expand All @@ -41,21 +44,20 @@ def get_dependencies_from_requirements(requirements_file="requirements.txt"):
yield dependent_package


def get_extra_data_from_requirements(requirements_file="requirements.txt"):
def get_extra_data_from_requirements(requirements_file="requirements.txt") -> Iterable[Mapping]:
"""
Yield extra_data for each requirement in a `requirement`
file.
Yield extra_data for each requirement in a `requirement` file.
"""
for package_data in PipRequirementsFileHandler.parse(location=requirements_file):
yield package_data.extra_data


def is_requirement_pinned(requirement: Requirement):
def is_requirement_pinned(requirement: Requirement) -> bool:
specifiers = requirement.specifier
return specifiers and len(specifiers) == 1 and next(iter(specifiers)).operator in {"==", "==="}


def get_dependency(specifier):
def get_dependency(specifier) -> DependentPackage:
"""
Return a DependentPackage given a requirement ``specifier`` string.

Expand Down
Loading
Loading