-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: open-sourcing rxn-availability.
Signed-off-by: Matteo Manica <[email protected]>
- Loading branch information
1 parent
89df7ed
commit e2231b2
Showing
29 changed files
with
3,020 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
name: Build and publish rxn-availability on PyPI | ||
|
||
on: | ||
push: | ||
tags: | ||
- 'v*' | ||
|
||
jobs: | ||
build-and-publish: | ||
name: Build and publish rxn-availability on PyPI | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@master | ||
- name: Python setup 3.8 | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: 3.8 | ||
- name: Install build package (for packaging) | ||
run: pip install --upgrade build | ||
- name: Build dist | ||
run: python -m build | ||
- name: Publish to PyPI | ||
uses: pypa/gh-action-pypi-publish@release/v1 | ||
with: | ||
user: __token__ | ||
password: ${{ secrets.PYPI_TOKEN }} | ||
skip_existing: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
name: "Running tests: style, mypy, pytest" | ||
|
||
on: [push, pull_request] | ||
|
||
jobs: | ||
tests: | ||
runs-on: ubuntu-latest | ||
name: Style, mypy, pytest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Set up Python 3.9 | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: 3.9 | ||
- name: Install Dependencies | ||
run: pip install -e ".[dev, rdkit]" | ||
- name: Install additional dependencies (for pydantic>2) | ||
run: pip install pydantic_settings | ||
- name: Check black | ||
run: python -m black --check --diff --color . | ||
- name: Check isort | ||
run: python -m isort --check --diff . | ||
- name: Check flake8 | ||
run: python -m flake8 . | ||
- name: Check mypy (on the package) | ||
run: python -m mypy --namespace-packages -p rxn.availability | ||
- name: Check mypy (on the tests) | ||
run: python -m mypy tests | ||
- name: Run pytests | ||
run: python -m pytest -sv --cov=rxn.availability --cov-fail-under=60 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
src/rxn/availability/.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
../.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
# PyCharm | ||
.idea/ | ||
|
||
.coverage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# rxn-availability | ||
|
||
## Development setup | ||
|
||
```console | ||
pip install -e ".[dev,rdkit]" | ||
``` | ||
|
||
## Example | ||
|
||
The easiest way to use the package is to rely on the `IsAvailable` object: | ||
|
||
```python | ||
from rxn.availability import IsAvailable | ||
|
||
is_available_object = IsAvailable() | ||
smiles = "B1C2CCCC1CCC2" | ||
print(f"{smiles} availability: {is_available_object(smiles}") | ||
|
||
# BYOC: bring your own compounds | ||
compounds_filepath = "tests/example_compounds.txt" | ||
is_available_object = IsAvailable(additional_compounds_filepath=compounds_filepath) | ||
smiles = "CC(Cc1ccc(cc1)C(C(=O)O)C)C" | ||
print(f"{smiles} availability: {is_available_object(smiles}") | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[build-system] | ||
requires = ["setuptools >= 40.6.0", "wheel"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[tool.isort] | ||
profile = "black" | ||
|
||
[tool.mypy] | ||
check_untyped_defs = true | ||
|
||
[[tool.mypy.overrides]] | ||
module = [ | ||
"rdkit.Chem", | ||
"pymongo" | ||
] | ||
ignore_missing_imports = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
[metadata] | ||
name = rxn_availability | ||
version = attr: rxn.availability.__version__ | ||
description = Describe your project here. | ||
author = IBM RXN team | ||
author_email = [email protected] | ||
long_description = file: README.md | ||
long_description_content_type = text/markdown | ||
python_requires = >= 3.7.* | ||
classifiers = | ||
Operating System :: OS Independent | ||
Programming Language :: Python :: 3 | ||
Programming Language :: Python :: 3.7 | ||
Programming Language :: Python :: 3.8 | ||
Programming Language :: Python :: 3.9 | ||
Programming Language :: Python :: 3.10 | ||
Programming Language :: Python :: 3.11 | ||
Programming Language :: Python :: 3.12 | ||
|
||
[options] | ||
package_dir= | ||
=src | ||
packages=find_namespace: | ||
zip_safe = False | ||
include_package_data = False # incompatible with package_data | ||
install_requires = | ||
importlib-resources>=5.8.0 | ||
pydantic>=1.9.0 | ||
pydantic_settings>=2.1.0 | ||
pymongo>=1.3.1 | ||
rxn-chem-utils>=1.5.0 | ||
rxn-utils>=1.1.1 | ||
|
||
[options.packages.find] | ||
where=src | ||
|
||
[options.package_data] | ||
rxn = | ||
availability/py.typed | ||
availability/resources/common_compounds.txt | ||
|
||
[options.extras_require] | ||
dev = | ||
black>=22.3.0 | ||
flake8>=3.7.9 | ||
isort>=5.10.1 | ||
mypy>=0.910 | ||
pytest>=5.3.4 | ||
pytest-cov>=2.8.1 | ||
types-setuptools>=57.4.14 | ||
rdkit = | ||
# install RDKit. This is not as a setup dependency in order not to install it | ||
# in downstream packages and avoid potential conflicts with the conda | ||
# installation of RDKit | ||
rdkit>=2021.3.2 | ||
|
||
[flake8] | ||
extend-ignore = E203, E501 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from setuptools import setup | ||
|
||
setup() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .is_available import AVAILABILITY_METADATA, IsAvailable # noqa: F401 | ||
|
||
__version__ = "2.0.0" # managed by bump2version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import logging | ||
from typing import Callable, Iterable, Iterator, Optional | ||
|
||
from .smiles_availability import AvailabilityMatch, SmilesAvailability | ||
|
||
logger = logging.getLogger(__name__) | ||
logger.addHandler(logging.NullHandler()) | ||
|
||
|
||
class AvailabilityCombiner(SmilesAvailability): | ||
""" | ||
Query the availability of SMILES strings by combining multiple other classes. | ||
This class is useful when the availability of SMILES strings is provided | ||
by multiple components - for instance, it avoids calling the standardization | ||
multiple times. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
sources: Iterable[SmilesAvailability], | ||
add_source_to_match_info_key: Optional[str] = None, | ||
excluded_sources: Optional[Iterable[Callable[[str], bool]]] = None, | ||
standardizer: Optional[Callable[[str], str]] = None, | ||
): | ||
""" | ||
Args: | ||
sources: instances of SmilesAvailability for the available sources. | ||
add_source_to_match_info_key: if specified, a pointer to the source will | ||
be added to the AvailabilityMatch info dictionary under that key. | ||
excluded_sources: sources to exclude, either given as an instance of | ||
SmilesAvailability, or as a callable function. | ||
standardizer: see doc in base class. | ||
""" | ||
super().__init__(standardizer=standardizer) | ||
self.sources = list(sources) | ||
self.add_source_to_match_info_key = add_source_to_match_info_key | ||
self.excluded_sources = ( | ||
[] if excluded_sources is None else list(excluded_sources) | ||
) | ||
|
||
def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: | ||
"""See base class for documentation.""" | ||
|
||
# Note: when it gets there, the SMILES string has already been | ||
# standardized (in the base class). | ||
|
||
if any(excluded(smiles) for excluded in self.excluded_sources): | ||
logger.debug(f'SMILES "{smiles}" is unavailable due to exclusion rule.') | ||
return | ||
|
||
for source in self.sources: | ||
for match in source.find_matches(smiles): | ||
if self.add_source_to_match_info_key is not None: | ||
match.info[self.add_source_to_match_info_key] = source | ||
yield match |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from typing import Callable, Iterator, Optional, Union | ||
|
||
from .databases import DB | ||
from .smiles_availability import AvailabilityMatch, SmilesAvailability | ||
|
||
|
||
class AvailabilityFromDatabase(SmilesAvailability): | ||
""" | ||
Query availability of SMILES strings from an instance of DB (such as MongoDB). | ||
""" | ||
|
||
def __init__( | ||
self, | ||
database: DB, | ||
standardizer: Optional[Callable[[str], str]] = None, | ||
pricing_threshold: Union[int, float] = 0, | ||
): | ||
super().__init__(standardizer=standardizer) | ||
|
||
self.database = database | ||
|
||
# NOTE: the database classes expect an integer | ||
self.pricing_threshold = int(pricing_threshold) | ||
|
||
def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: | ||
"""See base class for documentation.""" | ||
if self.database.availability( | ||
smi=smiles, pricing_threshold=self.pricing_threshold | ||
): | ||
yield AvailabilityMatch(details="Found in the database.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from typing import Callable, Iterable, Iterator, Optional, Pattern | ||
|
||
from .smiles_availability import AvailabilityMatch, SmilesAvailability | ||
|
||
|
||
class AvailabilityFromRegex(SmilesAvailability): | ||
""" | ||
Query availability of SMILES strings from regex checks. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
regexes: Iterable[Pattern], | ||
standardizer: Optional[Callable[[str], str]] = None, | ||
): | ||
super().__init__(standardizer=standardizer) | ||
|
||
self.available_regexes = list(regexes) | ||
|
||
def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: | ||
"""See base class for documentation.""" | ||
for pattern in self.available_regexes: | ||
if pattern.search(smiles): | ||
yield AvailabilityMatch(f'Matching regex "{pattern.pattern}".') |
Oops, something went wrong.