diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml new file mode 100644 index 0000000..912345c --- /dev/null +++ b/.github/workflows/pypi.yaml @@ -0,0 +1,28 @@ +name: Build and publish rxn-availability on PyPI + +on: + push: + tags: + - 'v*' + +jobs: + build-and-publish: + name: Build and publish rxn-availability on PyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@master + - name: Python setup 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install build package (for packaging) + run: pip install --upgrade build + - name: Build dist + run: python -m build + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + skip_existing: true \ No newline at end of file diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..c479d2c --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,30 @@ +name: "Running tests: style, mypy, pytest" + +on: [push, pull_request] + +jobs: + tests: + runs-on: ubuntu-latest + name: Style, mypy, pytest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: 3.9 + - name: Install Dependencies + run: pip install -e ".[dev, rdkit]" + - name: Install additional dependencies (for pydantic>2) + run: pip install pydantic_settings + - name: Check black + run: python -m black --check --diff --color . + - name: Check isort + run: python -m isort --check --diff . + - name: Check flake8 + run: python -m flake8 . + - name: Check mypy (on the package) + run: python -m mypy --namespace-packages -p rxn.availability + - name: Check mypy (on the tests) + run: python -m mypy tests + - name: Run pytests + run: python -m pytest -sv --cov=rxn.availability --cov-fail-under=60 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aca6310 --- /dev/null +++ b/.gitignore @@ -0,0 +1,109 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +src/rxn/availability/.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +../.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# PyCharm +.idea/ + +.coverage \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0cc8e35 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# rxn-availability + +## Development setup + +```console +pip install -e ".[dev,rdkit]" +``` + +## Example + +The easiest way to use the package is to rely on the `IsAvailable` object: + +```python +from rxn.availability import IsAvailable + +is_available_object = IsAvailable() +smiles = "B1C2CCCC1CCC2" +print(f"{smiles} availability: {is_available_object(smiles}") + +# BYOC: bring your own compounds +compounds_filepath = "tests/example_compounds.txt" +is_available_object = IsAvailable(additional_compounds_filepath=compounds_filepath) +smiles = "CC(Cc1ccc(cc1)C(C(=O)O)C)C" +print(f"{smiles} availability: {is_available_object(smiles}") +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6210e77 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools >= 40.6.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.isort] +profile = "black" + +[tool.mypy] +check_untyped_defs = true + +[[tool.mypy.overrides]] +module = [ + "rdkit.Chem", + "pymongo" +] +ignore_missing_imports = true \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3b169c4 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,58 @@ +[metadata] +name = rxn_availability +version = attr: rxn.availability.__version__ +description = Describe your project here. +author = IBM RXN team +author_email = rxn4chemistry@zurich.ibm.com +long_description = file: README.md +long_description_content_type = text/markdown +python_requires = >= 3.7.* +classifiers = + Operating System :: OS Independent + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 + +[options] +package_dir= + =src +packages=find_namespace: +zip_safe = False +include_package_data = False # incompatible with package_data +install_requires = + importlib-resources>=5.8.0 + pydantic>=1.9.0 + pydantic_settings>=2.1.0 + pymongo>=1.3.1 + rxn-chem-utils>=1.5.0 + rxn-utils>=1.1.1 + +[options.packages.find] +where=src + +[options.package_data] +rxn = + availability/py.typed + availability/resources/common_compounds.txt + +[options.extras_require] +dev = + black>=22.3.0 + flake8>=3.7.9 + isort>=5.10.1 + mypy>=0.910 + pytest>=5.3.4 + pytest-cov>=2.8.1 + types-setuptools>=57.4.14 +rdkit = + # install RDKit. This is not as a setup dependency in order not to install it + # in downstream packages and avoid potential conflicts with the conda + # installation of RDKit + rdkit>=2021.3.2 + +[flake8] +extend-ignore = E203, E501 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6068493 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() diff --git a/src/rxn/availability/__init__.py b/src/rxn/availability/__init__.py new file mode 100644 index 0000000..e411cc9 --- /dev/null +++ b/src/rxn/availability/__init__.py @@ -0,0 +1,3 @@ +from .is_available import AVAILABILITY_METADATA, IsAvailable # noqa: F401 + +__version__ = "2.0.0" # managed by bump2version diff --git a/src/rxn/availability/availability_combiner.py b/src/rxn/availability/availability_combiner.py new file mode 100644 index 0000000..7510e32 --- /dev/null +++ b/src/rxn/availability/availability_combiner.py @@ -0,0 +1,56 @@ +import logging +from typing import Callable, Iterable, Iterator, Optional + +from .smiles_availability import AvailabilityMatch, SmilesAvailability + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +class AvailabilityCombiner(SmilesAvailability): + """ + Query the availability of SMILES strings by combining multiple other classes. + + This class is useful when the availability of SMILES strings is provided + by multiple components - for instance, it avoids calling the standardization + multiple times. + """ + + def __init__( + self, + sources: Iterable[SmilesAvailability], + add_source_to_match_info_key: Optional[str] = None, + excluded_sources: Optional[Iterable[Callable[[str], bool]]] = None, + standardizer: Optional[Callable[[str], str]] = None, + ): + """ + Args: + sources: instances of SmilesAvailability for the available sources. + add_source_to_match_info_key: if specified, a pointer to the source will + be added to the AvailabilityMatch info dictionary under that key. + excluded_sources: sources to exclude, either given as an instance of + SmilesAvailability, or as a callable function. + standardizer: see doc in base class. + """ + super().__init__(standardizer=standardizer) + self.sources = list(sources) + self.add_source_to_match_info_key = add_source_to_match_info_key + self.excluded_sources = ( + [] if excluded_sources is None else list(excluded_sources) + ) + + def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """See base class for documentation.""" + + # Note: when it gets there, the SMILES string has already been + # standardized (in the base class). + + if any(excluded(smiles) for excluded in self.excluded_sources): + logger.debug(f'SMILES "{smiles}" is unavailable due to exclusion rule.') + return + + for source in self.sources: + for match in source.find_matches(smiles): + if self.add_source_to_match_info_key is not None: + match.info[self.add_source_to_match_info_key] = source + yield match diff --git a/src/rxn/availability/availability_from_database.py b/src/rxn/availability/availability_from_database.py new file mode 100644 index 0000000..3971af1 --- /dev/null +++ b/src/rxn/availability/availability_from_database.py @@ -0,0 +1,30 @@ +from typing import Callable, Iterator, Optional, Union + +from .databases import DB +from .smiles_availability import AvailabilityMatch, SmilesAvailability + + +class AvailabilityFromDatabase(SmilesAvailability): + """ + Query availability of SMILES strings from an instance of DB (such as MongoDB). + """ + + def __init__( + self, + database: DB, + standardizer: Optional[Callable[[str], str]] = None, + pricing_threshold: Union[int, float] = 0, + ): + super().__init__(standardizer=standardizer) + + self.database = database + + # NOTE: the database classes expect an integer + self.pricing_threshold = int(pricing_threshold) + + def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """See base class for documentation.""" + if self.database.availability( + smi=smiles, pricing_threshold=self.pricing_threshold + ): + yield AvailabilityMatch(details="Found in the database.") diff --git a/src/rxn/availability/availability_from_regex.py b/src/rxn/availability/availability_from_regex.py new file mode 100644 index 0000000..d76894f --- /dev/null +++ b/src/rxn/availability/availability_from_regex.py @@ -0,0 +1,24 @@ +from typing import Callable, Iterable, Iterator, Optional, Pattern + +from .smiles_availability import AvailabilityMatch, SmilesAvailability + + +class AvailabilityFromRegex(SmilesAvailability): + """ + Query availability of SMILES strings from regex checks. + """ + + def __init__( + self, + regexes: Iterable[Pattern], + standardizer: Optional[Callable[[str], str]] = None, + ): + super().__init__(standardizer=standardizer) + + self.available_regexes = list(regexes) + + def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """See base class for documentation.""" + for pattern in self.available_regexes: + if pattern.search(smiles): + yield AvailabilityMatch(f'Matching regex "{pattern.pattern}".') diff --git a/src/rxn/availability/availability_from_smarts.py b/src/rxn/availability/availability_from_smarts.py new file mode 100644 index 0000000..474880b --- /dev/null +++ b/src/rxn/availability/availability_from_smarts.py @@ -0,0 +1,30 @@ +from typing import Callable, Iterable, Iterator, Optional + +from rdkit.Chem import MolFromSmarts, MolFromSmiles + +from .smiles_availability import AvailabilityMatch, SmilesAvailability + + +class AvailabilityFromSmarts(SmilesAvailability): + """ + Query availability of SMILES strings from SMARTS matching. + """ + + def __init__( + self, + smarts: Iterable[str], + standardizer: Optional[Callable[[str], str]] = None, + ): + super().__init__(standardizer=standardizer) + + self.available_smarts = [(MolFromSmarts(s), s) for s in smarts] + + def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """See base class for documentation.""" + molecule = MolFromSmiles(smiles) + if not molecule: + return + + for pattern, smarts in self.available_smarts: + if molecule.HasSubstructMatch(pattern): + yield AvailabilityMatch(details=f'Matching SMARTS "{smarts}".') diff --git a/src/rxn/availability/availability_from_smiles.py b/src/rxn/availability/availability_from_smiles.py new file mode 100644 index 0000000..3da4619 --- /dev/null +++ b/src/rxn/availability/availability_from_smiles.py @@ -0,0 +1,23 @@ +from typing import Callable, Iterable, Iterator, Optional + +from .smiles_availability import AvailabilityMatch, SmilesAvailability + + +class AvailabilityFromSmiles(SmilesAvailability): + """ + Query availability of SMILES strings from exact matches. + """ + + def __init__( + self, + compounds: Iterable[str], + standardizer: Optional[Callable[[str], str]] = None, + ): + super().__init__(standardizer=standardizer) + + self.available_compounds = set(compounds) + + def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """See base class for documentation.""" + if smiles in self.available_compounds: + yield AvailabilityMatch(details=f'Matching exact SMILES, "{smiles}".') diff --git a/src/rxn/availability/config.py b/src/rxn/availability/config.py new file mode 100644 index 0000000..0a12a91 --- /dev/null +++ b/src/rxn/availability/config.py @@ -0,0 +1,26 @@ +"""Configuration for rxn-availability.""" + +from functools import lru_cache +from typing import Optional + +from pydantic import FilePath +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Setting class.""" + + # database configuration path + database_config_path: Optional[FilePath] = None + + model_config = SettingsConfigDict(env_prefix="RXN_") + + +@lru_cache() +def get_settings() -> Settings: + """Get cached settings. + + Returns: + rxn-availability settings. + """ + return Settings() # type:ignore diff --git a/src/rxn/availability/databases.py b/src/rxn/availability/databases.py new file mode 100644 index 0000000..51f904b --- /dev/null +++ b/src/rxn/availability/databases.py @@ -0,0 +1,147 @@ +import functools +import json +import logging +from typing import Any, Dict, List, Optional + +import pymongo +from pydantic import BaseModel, ValidationError +from pymongo import MongoClient +from rxn.utilities.databases.pymongo import PyMongoSettings + +from .config import Settings, get_settings + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +class DBConfig(BaseModel): + uri: str + database: str + collection: str + tls_ca_certificate_path: Optional[str] = None + + +class DB: + def __init__(self, url: str): + self.url = url + + def availability(self, smi: str, pricing_threshold: int = 0) -> bool: + raise NotImplementedError("Please use MongoDB instead of the DB Base class.") + + +class MongoDB(DB): + def __init__( + self, url: str, db: str, collection: str, tls_ca_certificate_path: Optional[str] + ): # For mongo we give directly the connection url + super().__init__(url=url) + self.db = db + self.collection = collection + self.tls_ca_certificate_path = tls_ca_certificate_path + self.mongo_client: MongoClient = PyMongoSettings.instantiate_client( + mongo_uri=self.url, tls_ca_certificate_path=self.tls_ca_certificate_path + ) + + @functools.lru_cache(maxsize=2**9) + def query_by_smi(self, smi: str) -> List[Dict[str, Any]]: + """Fetch info by SMILES in the database. + + Args: + smi: the molecule to check. + + Returns: + a list of matching SMILES with the corresponding metadata. + """ + + logger.debug(f"Fetch information for {smi} in database.") + + collection = self.mongo_client[self.db][self.collection] + + # Create index for smiles -- By default, indexes are created only if missing + logger.debug('Creating index for "smiles" key if it does not exist') + collection.create_index([("smiles", pymongo.HASHED)]) + + return list(collection.find({"smile": smi})) + + def availability(self, smi: str, pricing_threshold: int = 0) -> bool: + """Determines whether or not the given molecule `smi` is commercially available, + according to our customized version of the eMolecules database. + + If a pricing threshold is given, any molecule more expensive than the threshold + is considered unavailable. + + Args: + smi: the molecule to check. + pricing_threshold: the threshold in USD per g/L. + + Returns: + True if considered available, False otherwise. + """ + + logger.debug(f"Check availability for {smi} in database.") + + results = [item["price_per_amount"] for item in self.query_by_smi(smi=smi)] + + is_available = self._availability_from_db_results(results, pricing_threshold) + logger.debug( + f"Done checking for {smi} in database (is_available: {str(is_available)})." + ) + return is_available + + def _availability_from_db_results( + self, prices: List[Any], pricing_threshold: int + ) -> bool: + # False if nothing found in the DB + if len(prices) == 0: + return False + + # True if pricing threshold not set or set to max + if pricing_threshold == 0 or pricing_threshold == 1000: + return True + + # keep only the numbers (DB contains also "NA") + valid_prices = [p for p in prices if isinstance(p, (int, float))] + + # False if no price left + if len(valid_prices) == 0: + return False + + # True if the lowest price is under the threshold + return min(valid_prices) < pricing_threshold + + +def initialize_databases_from_environment_variables() -> Dict[str, DB]: + """Initialize databases from availbaility from the environment. + + This function uses the Settings class to read from the environment a path to a configuration file in JSON format + that stores the URI and connection information to instantiate databases used for availability. + + Returns: + a dictionary of database names and DB object. + """ + # Settings + settings: Settings = get_settings() + # Databases + databases: Dict[str, DB] = {} + if settings.database_config_path is None: + logger.warning( + "No database configuration provided! Not using database sources." + ) + else: + with open(settings.database_config_path) as json_file: + database_config = json.load(json_file) + for database in database_config: + try: + db_config = DBConfig.parse_obj(database_config[database]) + except ValidationError: + logger.error( + f"Database configuration problem. Check if the file at {settings.database_config_path} has the right configuration format for all databases." + ) + raise + new_db = MongoDB( + url=db_config.uri, + db=db_config.database, + collection=db_config.collection, + tls_ca_certificate_path=db_config.tls_ca_certificate_path, + ) + databases[database] = new_db + return databases diff --git a/src/rxn/availability/defaults.py b/src/rxn/availability/defaults.py new file mode 100644 index 0000000..f6ea8b8 --- /dev/null +++ b/src/rxn/availability/defaults.py @@ -0,0 +1,102 @@ +import re +from pathlib import Path +from typing import List, Pattern, Set + +import importlib_resources + + +def default_available_regexes() -> List[Pattern]: + """Get regex patterns for always available compounds. + + Returns: + a list of regex patterns. + """ + return [ + # Get all ions + re.compile(r"^\[\w{1,3}[+-]\d?\]$"), + # Get Single and double elements (e.g: O2) + re.compile(r"^([A-Z][a-z]?){1,2}$"), + # Get Single and double elements in squared parentheses + re.compile(r"^(\[[A-Z][a-z]?\]){1,2}$"), + # Matches stuff like [HH], [BrBr] + re.compile(r"^\[([A-Z][a-z]?){1,2}\]$"), + re.compile(r"^[A-Z].?[A-Z]$"), + ] + + +def default_available_smarts_patterns() -> List[str]: + """Get SMARTS patterns for always available compounds. + + Returns: + a list of SMARTS. + """ + # NOTE: biochemical reaction cofactors + return [ + "O=C(NCC*)CCNC(=O)C(O)C(C)(C)COP(=O)(*)OP(=O)(*)OC*3O*(n2cnc1c(ncnc12)N)*(O)*3OP(=O)(*)*", + "**1*(*)*(COP(*)(=O)OP(*)(=O)OC*2O*(*)*(*)*2*)O*1*", + "**1*(*)*(O*1COP(*)(=O)O)[R]", + "*P(*)(=O)O*1*(*)*(*)O[*]1COP(*)(*)=O", + "**1*(*)*(O*1CS*)[R]", + "**1**2**3*(**(=O)**3=O)*(*)*2**1*", + "*~1~*~*~2~*~*~1~*~*~1~*~*~*(~*~*~3~*~*~*(~*~*~4~*~*~*(~*~2)~*~4)~*~3)~*~1", + "S1[Fe]S[Fe]1", + ] + + +def common_biochemical_byproducts() -> Set[str]: + """Get common biochemical compounds that are not part of a commercial compound database. + + Returns: + a set of SMILES. + """ + return { + "O=P([O-])([O-])[O-]", + "O=P([O-])([O-])O", + "C[N+](C)(C)CCO", + "NCCO", + "O=P([O-])([O-])OP(=O)([O-])[O-]", + "O=P([O-])([O-])OP(=O)([O-])O", + "O=C([O-])CCC(=O)C(=O)[O-]", + "CC(=O)[O-]", + "CC(=O)C(=O)[O-]", + } + + +def default_available_compounds() -> Set[str]: + """Get common available compounds that are not part of a commercial compound database. + + Returns: + a set of SMILES. + """ + return _get_compounds_from_packaged_file("common_compounds.txt") + + +def _get_compounds_from_packaged_file(packaged_file_name: str) -> Set[str]: + """Get the compounds contained in a file that is packaged with rxn-availability. + + Args: + packaged_file_name: file containing one SMILES per line, and potentially + some lines for comments (starting with "#"). This file must be listed + in setup.cfg! + + Returns: + a set of SMILES. + """ + return get_compounds_from_file( + importlib_resources.files(__package__) / "resources" / packaged_file_name + ) + + +def get_compounds_from_file(file_path: Path) -> Set[str]: + """Get the compounds from file. + + Args: + file_path: file containing one SMILES per line, and potentially + some lines for comments (starting with "#"). + + Returns: + a set of SMILES. + """ + with open(file_path) as fp: + raw_text: str = fp.read() + return {line for line in raw_text.splitlines() if not line.startswith("#")} diff --git a/src/rxn/availability/is_available.py b/src/rxn/availability/is_available.py new file mode 100644 index 0000000..22fef93 --- /dev/null +++ b/src/rxn/availability/is_available.py @@ -0,0 +1,323 @@ +import logging +from collections import OrderedDict +from pathlib import Path +from typing import Callable, Dict, Iterable, List, Optional, Set, Union + +from rxn.chemutils.smiles_standardization import standardize_molecules + +from .availability_combiner import AvailabilityCombiner +from .availability_from_database import AvailabilityFromDatabase +from .availability_from_regex import AvailabilityFromRegex +from .availability_from_smarts import AvailabilityFromSmarts +from .availability_from_smiles import AvailabilityFromSmiles +from .databases import initialize_databases_from_environment_variables +from .defaults import ( + common_biochemical_byproducts, + default_available_compounds, + default_available_regexes, + default_available_smarts_patterns, + get_compounds_from_file, +) +from .smiles_availability import AvailabilityMatch, SmilesAvailability +from .utils import wrap_standardizer_with_tilde_substitution + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + +AVAILABILITY_METADATA = OrderedDict( + [ + ( + "common", + {"color": "#002d9c", "label": "Common molecule available by default"}, + ), + ( + "model", + { + "color": "#0f62fe", + "label": "Molecule available using a model-specific database", + }, + ), + ( + "emolecules", + { + "color": "#28a30d", + "label": "Molecule commercially available on eMolecules.com", + }, + ), + ( + "database", + { + "color": "#3ddbd9", + "label": "Molecule commercially available from a database", + }, + ), + ( + "unavailable", + {"color": "#ce4e04", "label": "Not able to find a synthetic path"}, + ), + ("from_file", {"color": "#f1c21b", "label": "Molecule from file"}), + ] +) + + +def default_standardize_molecules(smiles: str) -> str: + """Standardize molecules. + + Args: + smiles (str): standardize molecules with defaults. + + Returns: + standardized molecules. + """ + return standardize_molecules( + smiles, + canonicalize=True, + sanitize=True, + inchify=False, + fragment_bond="~", + ordered_precursors=True, + molecule_token_delimiter=None, + is_enzymatic=False, + enzyme_separator="|", + ) + + +class IsAvailable: + """ + Class handling the availability of compounds. + + Combines different sources for availability and exclusion, mixing default + (hard-coded) values, availability databases, and user-provided input. + """ + + def __init__( + self, + pricing_threshold: int = 0, + always_available: Optional[List[str]] = None, + model_available: Optional[Iterable[str]] = None, + excluded: Optional[List[str]] = None, + avoid_substructure: Optional[List[str]] = None, + are_materials_exclusive: bool = False, + standardization_function: Callable[[str], str] = default_standardize_molecules, + additional_compounds_filepath: Optional[Union[Path, str]] = None, + ) -> None: + """ + Initialize the availability probing object. + + Args: + always_available: list of always available compounds. + model_available: compounds available for the selected model. + excluded: excluded compounds. + avoid_substructure: substructures to avoid. + are_materials_exclusive: flag indicating whether the materials + provided by users are replacing (True) or complementing (False) the available + compounds associated to a model and the ones from the availability database. + Defaults to False, a.k.a., complementing the list. + standardization_function: function to standardize a SMILES string. + It handles multiple molecule separated with '.' as well as '~' fragment bonds. + additional_compounds_filepath: path to compounds to add to the available ones + from a custom file source. + """ + self.standardization_function = wrap_standardizer_with_tilde_substitution( + standardization_function + ) + self.are_materials_exclusive = are_materials_exclusive + + additional_compounds_from_filepath: Set[str] = set() + if additional_compounds_filepath is not None: + additional_compounds_from_filepath = { + standardization_function(smiles) + for smiles in get_compounds_from_file( + Path(additional_compounds_filepath) + ) + } + + # Compounds available by default + self.from_default_compounds = AvailabilityFromSmiles( + default_available_compounds() + | common_biochemical_byproducts() + | additional_compounds_from_filepath + ) + self.from_default_regexes = AvailabilityFromRegex(default_available_regexes()) + self.from_default_smarts = AvailabilityFromSmarts( + default_available_smarts_patterns() + ) + + # User and model available compounds + self.from_user = AvailabilityFromSmiles(self._ensure_iterable(always_available)) + self.from_model = AvailabilityFromSmiles(self._ensure_iterable(model_available)) + + # Database compounds + self._pricing_threshold = pricing_threshold + self.from_database = { + database_name: AvailabilityFromDatabase( + database, pricing_threshold=self._pricing_threshold + ) + for database_name, database in initialize_databases_from_environment_variables().items() + } + + # Excluded compounds + self.excluded_compounds = AvailabilityFromSmiles( + self._ensure_iterable(excluded) + ) + self.excluded_substructures = AvailabilityFromSmarts( + self._ensure_iterable(avoid_substructure) + ) + + # Under which key the instance of SmilesAvailability will be stored in the + # 'info' dict of AvailabilityMatch. + self.key_for_source_instance = "smilesavailability_instance" + + def _ensure_iterable( + self, optional_iterable: Optional[Iterable[str]] + ) -> Iterable[str]: + """For optional iterables, replace the None value by an empty list.""" + if optional_iterable is None: + return [] + return optional_iterable + + def __call__(self, smiles: str) -> bool: + """ + Inquire the availability of a SMILES string. + + Args: + smiles: SMILES string for which the availability is needed. + + Returns: + False if the SMILES string is not available, True otherwise. + """ + first_match = self._get_first_availability_match(smiles) + if first_match is None: + logger.debug(f'SMILES "{smiles}" is not available.') + return False + + logger.debug(f'SMILES "{smiles}" is available: {first_match.details}.') + return True + + def get_availability_metadata(self, smiles: str) -> Dict: + """Get availability metadata given a SMILES string. + + Args: + smiles: SMILES string for which the availability metadata are needed. + + Returns: + metadata on availability. + """ + + match = self._get_first_availability_match(smiles) + if match is None: + availability_metadata_key = "unavailable" + else: + source = match.info[self.key_for_source_instance] + availability_metadata_key = self._source_to_category(source) + + return AVAILABILITY_METADATA[availability_metadata_key] + + def is_expandable(self, smiles: str) -> bool: + """ + Get expandability given a SMILES. + + Args: + smiles: SMILES string for which the expandable information is needed. + + Returns: + whether the molecule is expandable. + """ + + # Note: this does the same thing as the original implementation - maybe, + # it will be necessary to review this behavior at some point. For instance, + # it does not consider the default SMARTS strings. + sources = [ + self.from_default_compounds, + self.from_default_regexes, + self.from_user, + ] + match = self._get_first_availability_match( + smiles, sources=sources, excluded_sources=[] + ) + + # if there is no match, it means that the molecule is expandable. + return match is None + + def _get_first_availability_match( + self, + smiles: str, + sources: Optional[Iterable[SmilesAvailability]] = None, + excluded_sources: Optional[Iterable[SmilesAvailability]] = None, + ) -> Optional[AvailabilityMatch]: + """ + Get the first availability match (None if nothing found). + + Args: + sources: availability sources to consider. Defaults to all the sources + to consider in this class (default compounds, user compounds, + database, etc.). + excluded_sources: sources to exclude. Defaults to the excluded compounds + and substructures. + """ + if sources is None: + sources = [ + self.from_default_compounds, + self.from_default_regexes, + self.from_default_smarts, + self.from_user, + ] + if not self.are_materials_exclusive: + sources.append(self.from_model) + sources.extend(self.from_database.values()) + if excluded_sources is None: + excluded_sources = [ + self.excluded_compounds, + self.excluded_substructures, + ] + + availability_combiner = AvailabilityCombiner( + sources=sources, + add_source_to_match_info_key=self.key_for_source_instance, + excluded_sources=excluded_sources, + standardizer=self.standardization_function, + ) + + return availability_combiner.first_match(smiles) + + def _source_to_category(self, source: SmilesAvailability) -> str: + """ + Get the category corresponding to a SmilesAvailability instance. + + Corresponds to the key being used in AVAILABILITY_METADATA. + """ + if any( + source is s + for s in [ + self.from_default_compounds, + self.from_default_regexes, + self.from_default_smarts, + self.from_user, + ] + ): + return "common" + + if source is self.from_model: + return "model" + + for key, value in self.from_database.items(): + if source is value: + return key if key == "emolecules" else "database" + + raise ValueError(f'Cannot get category for source "{source}"') + + @property + def pricing_threshold(self) -> int: + """ + Returns the current value of this object's pricing threshold (USD per g/L). + """ + return self._pricing_threshold + + @pricing_threshold.setter + def pricing_threshold(self, value: int): + """ + Sets the value of this object's pricing threshold to the given value (in USD per g/L) + """ + self._pricing_threshold = value + for database in self.from_database.values(): + database.pricing_threshold = self._pricing_threshold diff --git a/src/rxn/availability/py.typed b/src/rxn/availability/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/rxn/availability/resources/__init__.py b/src/rxn/availability/resources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/rxn/availability/resources/common_compounds.txt b/src/rxn/availability/resources/common_compounds.txt new file mode 100644 index 0000000..883e9ea --- /dev/null +++ b/src/rxn/availability/resources/common_compounds.txt @@ -0,0 +1,1698 @@ +# +# List of common compounds that should be considered always available. +# This list was compiled in 2018/2019. +# +B1C2CCCC1CCC2 +BrB(Br)Br +BrC(Br)(Br)Br +BrC(Br)Br +BrC1CCCC1 +BrCBr +BrCC1CC1 +BrCC1CCC1 +BrCC1CCCCC1 +BrCC1CCCCO1 +BrCC1CO1 +BrCCBr +BrCCCBr +BrCCCCBr +BrCCCCCBr +BrCCCCCCBr +BrCCCc1ccccc1 +BrCCOC1CCCCO1 +BrCCOCCBr +BrCCc1ccccc1 +BrCc1ccc(Br)cc1 +BrCc1ccc2ccccc2c1 +BrCc1cccc(Br)c1 +BrCc1ccccc1 +BrCc1ccccc1Br +BrCc1ccccn1 +BrP(Br)Br +Br[Cu]Br +Br[Mg]C1CC1 +Br[Mg]c1ccccc1 +Br[P+](N1CCCC1)(N1CCCC1)N1CCCC1 +Brc1ccc(-c2ccccc2)cc1 +Brc1ccc(Br)cc1 +Brc1ccc(Br)nc1 +Brc1ccc(I)cc1 +Brc1ccc2[nH]ccc2c1 +Brc1ccc2ccc3c(Br)ccc4ccc1c2c43 +Brc1cccc(Br)c1 +Brc1cccc(Br)n1 +Brc1cccc(I)c1 +Brc1ccccc1 +Brc1ccccn1 +Brc1cccnc1 +Brc1cccs1 +Brc1ccsc1 +Brc1cn[nH]c1 +Brc1cnc2[nH]ccc2c1 +Brc1cncc(Br)c1 +Brc1cncnc1 +Brc1ncccn1 +Brc1nccs1 +C#CC(=O)O +C#CC(=O)OC +C#CC(=O)OCC +C#CC(C)(C)C +C#CC(C)(C)N +C#CC(C)(C)O +C#CC1CC1 +C#CCBr +C#CCCO +C#CCN +C#CCO +C#C[C@]1(O)CCN(C)C1=O +C#C[Mg]Br +C#C[Si](C)(C)C +C#Cc1ccc(F)cc1 +C#Cc1cccc(N)c1 +C#Cc1ccccc1 +C#Cc1ccccn1 +C(=NC1CCCCC1)=NC1CCCCC1 +C/C(=N\\[Si](C)(C)C)O[Si](C)(C)C +C/C=C/C=O +C1=CC2C3C=CC(C3)C2C1 +C1=CC2CCC1C2 +C1=CCC=C1 +C1=CCC=CC1 +C1=CCCCC1 +C1=COCCC1 +C1=Cc2ccccc2C1 +C1CC2(CCN1)OCCO2 +C1CCC(NC2CCCCC2)CC1 +C1CCC(P(C2CCCCC2)C2CCCCC2)CC1 +C1CCC2=NCCCN2CC1 +C1CCCCC1 +C1CCN(C2CCNCC2)CC1 +C1CCNC1 +C1CCNCC1 +C1CCOC1 +C1CN2CCN1CC2 +C1CNC1 +C1CNCCN1 +C1CNCCNC1 +C1CO1 +C1COCCN1 +C1COCCO1 +C1COCCOCCOCCOCCO1 +C1COCCOCCOCCOCCOCCO1 +C1CSCCN1 +C1N2CN3CN1CN(C2)C3 +C=C(C)B1OC(C)(C)C(C)(C)O1 +C=C(C)C +C=C(C)C(=O)Cl +C=C(C)C(=O)O +C=C(C)C(=O)OC +C=C(C)C(=O)OC(=O)C(=C)C +C=C(C)C(=O)OCC1CO1 +C=C(C)C(=O)OCCO +C=C(C)CBr +C=C(C)CCl +C=C(C)c1ccccc1 +C=C(OCC)[Sn](CCCC)(CCCC)CCCC +C=C1CC(=O)O1 +C=CB1OC(C)(C)C(C)(C)O1 +C=CC +C=CC#N +C=CC(=C)C +C=CC(=O)Cl +C=CC(=O)O +C=CC(=O)OC +C=CC(=O)OC(C)(C)C +C=CC(=O)OCC +C=CC(=O)OCCCC +C=CC(=O)OCCO +C=CC(C)=O +C=CC(N)=O +C=CC=C +C=CC=O +C=CCBr +C=CCC +C=CCCBr +C=CCCCC +C=CCCCCCC +C=CCCO +C=CCCl +C=CCI +C=CCN +C=CCO +C=CCOC(=O)Cl +C=CC[Mg]Br +C=CC[Sn](CCCC)(CCCC)CCCC +C=CN1CCCC1=O +C=COC(C)=O +C=COCC +C=COCCCC +C=C[B-](F)(F)F +C=C[Mg]Br +C=C[Sn](CCCC)(CCCC)CCCC +C=Cc1ccccc1 +C=[N+]=[N-] +CB(O)O +CB1OB(C)OB(C)O1 +CC#N +CC(=N)N +CC(=O)C#N +CC(=O)C(=O)O +CC(=O)C(C)(C)C +CC(=O)C(C)C +CC(=O)C1CC1 +CC(=O)CC(C)=O +CC(=O)CC(C)C +CC(=O)CCC(C)=O +CC(=O)CCl +CC(=O)Cl +CC(=O)N(C)C +CC(=O)N1CCNCC1 +CC(=O)NN +CC(=O)Nc1ccc(O)cc1 +CC(=O)O +CC(=O)OC(C)(C)C +CC(=O)OC(C)=O +CC(=O)OC(C)C +CC(=O)OCC(=O)Cl +CC(=O)OCC1=C(C(=O)O)N2C(=O)[C@@H](N)[C@H]2SC1 +CC(=O)OCCBr +CC(=O)OCc1c2ccccc2c(COC(C)=O)c2ccccc12 +CC(=O)OI1(OC(C)=O)(OC(C)=O)OC(=O)c2ccccc21 +CC(=O)OO +CC(=O)OOC(C)=O +CC(=O)O[BH-](OC(C)=O)OC(C)=O +CC(=O)O[Hg]OC(C)=O +CC(=O)O[IH2](OC(C)=O)c1ccccc1 +CC(=O)O[K] +CC(=O)O[Na] +CC(=O)O[Pd]OC(C)=O +CC(=O)[O-] +CC(=O)c1ccc(Br)cc1 +CC(=O)c1ccc(F)cc1 +CC(=O)c1ccc(N)cc1 +CC(=O)c1ccc(O)cc1 +CC(=O)c1ccccc1 +CC(Br)Br +CC(C)(Br)C(=O)Br +CC(C)(C#N)N=NC(C)(C)C#N +CC(C)(C)C(=O)CBr +CC(C)(C)C(=O)CC#N +CC(C)(C)C(=O)Cl +CC(C)(C)C(=O)O +CC(C)(C)C(=O)OCCl +CC(C)(C)C=O +CC(C)(C)CC(=O)Cl +CC(C)(C)CC=O +CC(C)(C)CN +CC(C)(C)N +CC(C)(C)N=C=O +CC(C)(C)O +CC(C)(C)OC(=O)/N=N/C(=O)OC(C)(C)C +CC(C)(C)OC(=O)CBr +CC(C)(C)OC(=O)CN +CC(C)(C)OC(=O)N1CC(=O)C1 +CC(C)(C)OC(=O)N1CC(N)C1 +CC(C)(C)OC(=O)N1CC(O)C1 +CC(C)(C)OC(=O)N1CC=C(B2OC(C)(C)C(C)(C)O2)CC1 +CC(C)(C)OC(=O)N1CCC(=O)C1 +CC(C)(C)OC(=O)N1CCC(=O)CC1 +CC(C)(C)OC(=O)N1CCC(C(=O)O)CC1 +CC(C)(C)OC(=O)N1CCC(C=O)CC1 +CC(C)(C)OC(=O)N1CCC(CN)CC1 +CC(C)(C)OC(=O)N1CCC(CO)CC1 +CC(C)(C)OC(=O)N1CCC(N)CC1 +CC(C)(C)OC(=O)N1CCC(O)CC1 +CC(C)(C)OC(=O)N1CCC(OS(C)(=O)=O)CC1 +CC(C)(C)OC(=O)N1CCCNCC1 +CC(C)(C)OC(=O)N1CCC[C@@H](N)C1 +CC(C)(C)OC(=O)N1CCC[C@H]1C(=O)O +CC(C)(C)OC(=O)N1CCNCC1 +CC(C)(C)OC(=O)N1CC[C@@H](N)C1 +CC(C)(C)OC(=O)N1CC[C@H](N)C1 +CC(C)(C)OC(=O)N1C[C@H](O)C[C@H]1C(=O)O +CC(C)(C)OC(=O)N=NC(=O)OC(C)(C)C +CC(C)(C)OC(=O)NC(C)(C)C(=O)O +CC(C)(C)OC(=O)NC1CCNC1 +CC(C)(C)OC(=O)NC1CCNCC1 +CC(C)(C)OC(=O)NCC(=O)O +CC(C)(C)OC(=O)NCCBr +CC(C)(C)OC(=O)NCCC(=O)O +CC(C)(C)OC(=O)NCCCBr +CC(C)(C)OC(=O)NCCCN +CC(C)(C)OC(=O)NCCN +CC(C)(C)OC(=O)NCCO +CC(C)(C)OC(=O)NN +CC(C)(C)OC(=O)N[C@@H](Cc1ccccc1)C(=O)O +CC(C)(C)OC(=O)N[C@@H]1CCCNC1 +CC(C)(C)OC(=O)N[C@@H]1CCNC1 +CC(C)(C)OC(=O)N[C@H](C(=O)O)C(C)(C)C +CC(C)(C)OC(=O)N[C@H]1CCCC[C@H]1N +CC(C)(C)OC(=O)N[C@H]1CCNC1 +CC(C)(C)OC(=O)OC(=O)OC(C)(C)C +CC(C)(C)OC(=O)OC(C)(C)C +CC(C)(C)OC(=O)c1ccc(N)cc1 +CC(C)(C)OC(=O)n1cc(B2OC(C)(C)C(C)(C)O2)cn1 +CC(C)(C)OC(N)=O +CC(C)(C)OCl +CC(C)(C)ON=O +CC(C)(C)OO +CC(C)(C)O[K] +CC(C)(C)O[Na] +CC(C)(C)P(C(C)(C)C)C(C)(C)C +CC(C)(C)P([c-]1cccc1)C(C)(C)C +CC(C)(C)P(c1ccccc1-c1ccccc1)C(C)(C)C +CC(C)(C)S +CC(C)(C)S(N)=O +CC(C)(C)[Mg]Cl +CC(C)(C)[O-] +CC(C)(C)[PH+](C(C)(C)C)C(C)(C)C +CC(C)(C)[P]([Pd][P](C(C)(C)C)(C(C)(C)C)C(C)(C)C)(C(C)(C)C)C(C)(C)C +CC(C)(C)[S@@](N)=O +CC(C)(C)[S@](N)=O +CC(C)(C)[Si](C)(C)Cl +CC(C)(C)[Si](C)(C)OCC=O +CC(C)(C)[Si](C)(C)OCCBr +CC(C)(C)[Si](C)(C)OS(=O)(=O)C(F)(F)F +CC(C)(C)[Si](Cl)(c1ccccc1)c1ccccc1 +CC(C)(C)c1ccc(B(O)O)cc1 +CC(C)(C)c1ccc(C(=O)Cl)cc1 +CC(C)(C)c1ccc(N)cc1 +CC(C)(C)c1ccc(O)cc1 +CC(C)(C)c1ccc(S(=O)(=O)Cl)cc1 +CC(C)(CO)CO +CC(C)(N)CO +CC(C)(O)C#N +CC(C)(O)C(C)(C)O +CC(C)(O)CN +CC(C)(c1ccc(O)cc1)c1ccc(O)cc1 +CC(C)=C(Cl)N(C)C +CC(C)=CCBr +CC(C)=O +CC(C)Br +CC(C)C(=O)Cl +CC(C)C(=O)Nc1cccc(C2CCNCC2)c1 +CC(C)C(=O)O +CC(C)C(=O)OC(=O)C(C)C +CC(C)C(C)BC(C)C(C)C +CC(C)C=O +CC(C)CBr +CC(C)CC(=O)Cl +CC(C)CC=O +CC(C)CCBr +CC(C)CCN +CC(C)CCO +CC(C)CCON=O +CC(C)CI +CC(C)CN +CC(C)CO +CC(C)COC(=O)Cl +CC(C)C[Al+]CC(C)C +CC(C)C[AlH]CC(C)C +CC(C)C[C@H](N)C(=O)O +CC(C)C[C@H](NC(=O)OC(C)(C)C)C(=O)O +CC(C)I +CC(C)N +CC(C)N1CCNCC1 +CC(C)N=C=NC(C)C +CC(C)N=C=O +CC(C)NC(C)C +CC(C)O +CC(C)OB(OC(C)C)OC(C)C +CC(C)OB1OC(C)(C)C(C)(C)O1 +CC(C)OC(=O)/N=N/C(=O)OC(C)C +CC(C)OC(=O)Cl +CC(C)OC(=O)N=NC(=O)OC(C)C +CC(C)OC(C)C +CC(C)O[Ti](OC(C)C)(OC(C)C)OC(C)C +CC(C)Oc1cccc(OC(C)C)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1 +CC(C)S(=O)(=O)Cl +CC(C)[C@H](N)C(=O)O +CC(C)[C@H](NC(=O)OC(C)(C)C)C(=O)O +CC(C)[C@H](NC(=O)OCc1ccccc1)C(=O)O +CC(C)[Mg]Br +CC(C)[Mg]Cl +CC(C)[N-]C(C)C +CC(C)[O-] +CC(C)[Si](Cl)(C(C)C)C(C)C +CC(C)c1cc(C(C)C)c(-c2ccccc2P(C(C)(C)C)C(C)(C)C)c(C(C)C)c1 +CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2)c(C(C)C)c1 +CC(C)c1cccc(C(C)C)c1N +CC(Cl)Cl +CC(Cl)OC(=O)Cl +CC(N)=O +CC(N)=S +CC(N)CO +CC(O)=S +CC(O)C(=O)O +CC(O)CN +CC(O)CO +CC([O-])=S +CC1(C)C(=O)N(Br)C(=O)N1Br +CC1(C)C2CCC1(CS(=O)(=O)O)C(=O)C2 +CC1(C)CC(=O)CC(=O)C1 +CC1(C)CCCC(C)(C)N1 +CC1(C)CCCC(C)(C)N1O +CC1(C)CCCC(C)(C)N1[O] +CC1(C)CO1 +CC1(C)COB(B2OCC(C)(C)CO2)OC1 +CC1(C)OB(B2OC(C)(C)C(C)(C)O2)OC1(C)C +CC1(C)OB(C2=CCOCC2)OC1(C)C +CC1(C)OB(c2ccc(N)cc2)OC1(C)C +CC1(C)OB(c2ccc(N)nc2)OC1(C)C +CC1(C)OB(c2ccc(O)cc2)OC1(C)C +CC1(C)OB(c2cn[nH]c2)OC1(C)C +CC1(C)OB(c2cnc(N)nc2)OC1(C)C +CC1(C)OBOC1(C)C +CC1(C)OC(=O)CC(=O)O1 +CC1(C)OCC(CO)O1 +CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21 +CC1CCCO1 +CC1CCNCC1 +CC1CO1 +CC=C(C)C +CC=O +CCB(CC)CC +CCBr +CCC#N +CCC(=O)CC +CCC(=O)Cl +CCC(=O)O +CCC(=O)OC(=O)CC +CCC(=O)OO +CCC(=O)c1ccccc1 +CCC(C)(C)O +CCC(C)=O +CCC(C)O +CCC(C)[BH-](C(C)CC)C(C)CC +CCC=O +CCCBr +CCCC#N +CCCC(=O)Cl +CCCC(=O)O +CCCC(=O)OC(=O)CCC +CCCC(=O)OO +CCCC=O +CCCCBr +CCCCC +CCCCC(=O)Cl +CCCCC(CC)CO +CCCCC=O +CCCCCBr +CCCCCC +CCCCCC(=O)Cl +CCCCCC=O +CCCCCCBr +CCCCCCC +CCCCCCCC(=O)Cl +CCCCCCCC/C=C\\CCCCCCCC(=O)O +CCCCCCCCBr +CCCCCCCCCCCC(=O)Cl +CCCCCCCCCCCC(=O)[O-] +CCCCCCCCCCCCBr +CCCCCCCCCCCCCCCC(=O)Cl +CCCCCCCCCCCCCCCCCC(=O)O +CCCCCCCCCCCCN +CCCCCCCCCCCCOS(=O)(=O)[O-] +CCCCCCCCCCCCS +CCCCCCCCN +CCCCCCCCO +CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC +CCCCCCN +CCCCCCO +CCCCCN +CCCCCO +CCCCI +CCCCN +CCCCN(CCCC)CCCC +CCCCN=C=O +CCCCNCCCC +CCCCO +CCCCOC(C)=O +CCCCOCCCC +CCCCP(=CC#N)(CCCC)CCCC +CCCCP(C12CC3CC(CC(C3)C1)C2)C12CC3CC(CC(C3)C1)C2 +CCCCP(CCCC)CCCC +CCCC[Mg+] +CCCC[Mg]Cl +CCCC[N+](CCCC)(CCCC)CCCC +CCCC[Sn+2]CCCC +CCCC[SnH](CCCC)CCCC +CCCC[Sn](=O)CCCC +CCCC[Sn](CCCC)(CCCC)N=[N+]=[N-] +CCCC[Sn](CCCC)(CCCC)c1ccccn1 +CCCC[Sn](CCCC)(CCCC)c1cccs1 +CCCC[Sn](Cl)(CCCC)CCCC +CCCI +CCCN +CCCNCCC +CCCO +CCCP(=O)=O +CCCP1(=O)OP(=O)(CCC)OP(=O)(CCC)O1 +CCCS +CCCS(=O)(=O)Cl +CCC[Mg+] +CCC[N+](CCC)(CCC)CCC +CCN(C(C)C)C(C)C +CCN(CC)C(=O)Cl +CCN(CC)C(C)C +CCN(CC)CC +CCN(CC)CCN +CCN(CC)S(F)(F)F +CCN(CC)c1ccccc1 +CCN1CCNCC1 +CCN1CCOCC1 +CCN=C=NCCCN(C)C +CCN=C=O +CCNC +CCNCC +CCOC(=O)/N=N/C(=O)OCC +CCOC(=O)C(=O)CBr +CCOC(=O)C(=O)Cl +CCOC(=O)C(=O)OCC +CCOC(=O)C(C#N)=NOC(N(C)C)=[N+](C)C +CCOC(=O)C(C#N)=NOC(N1CCOCC1)=[N+](C)C +CCOC(=O)C(C)(C)Br +CCOC(=O)C(C)=O +CCOC(=O)C(C)Br +CCOC(=O)C(C)C +CCOC(=O)C(C)C(=O)OCC +CCOC(=O)C(Cl)C(C)=O +CCOC(=O)C(F)(F)Br +CCOC(=O)C(F)(F)F +CCOC(=O)C(NC(C)=O)C(=O)OCC +CCOC(=O)C1CCC(=O)CC1 +CCOC(=O)C1CCNCC1 +CCOC(=O)C=O +CCOC(=O)C=P(c1ccccc1)(c1ccccc1)c1ccccc1 +CCOC(=O)C=[N+]=[N-] +CCOC(=O)CBr +CCOC(=O)CC +CCOC(=O)CC#N +CCOC(=O)CC(=O)C(F)(F)F +CCOC(=O)CC(=O)CCl +CCOC(=O)CC(=O)Cl +CCOC(=O)CC(=O)OCC +CCOC(=O)CC(=O)[O-] +CCOC(=O)CC(=O)c1ccccc1 +CCOC(=O)CC(C)=O +CCOC(=O)CCBr +CCOC(=O)CCCBr +CCOC(=O)CCCCBr +CCOC(=O)CCN +CCOC(=O)CCl +CCOC(=O)CN +CCOC(=O)CN=C=O +CCOC(=O)CO +CCOC(=O)CP(=O)(OCC)OCC +CCOC(=O)CS +CCOC(=O)CSc1cnc(N)s1 +CCOC(=O)Cl +CCOC(=O)N1CCNCC1 +CCOC(=O)N1c2ccccc2C=CC1OCC +CCOC(=O)N=NC(=O)OCC +CCOC(=O)NN +CCOC(=O)OCC +CCOC(=O)c1ccc(I)cc1 +CCOC(=O)c1ccc(N)cc1 +CCOC(=O)c1ccc(O)cc1 +CCOC(=O)c1cccc(N)c1 +CCOC(=O)c1cn[nH]c1 +CCOC(=S)[S-] +CCOC(C)(OCC)OCC +CCOC(C)=O +CCOC(CBr)OCC +CCOC(CN)OCC +CCOC(OCC)OCC +CCOC([O-])[O-] +CCOC1(O[Si](C)(C)C)CC1 +CCOC=C(C(=O)OCC)C(=O)OCC +CCOC=O +CCOCC +CCOCCO +CCON +CCOOCC +CCOP(=O)(C#N)OCC +CCOP(=O)(CC#N)OCC +CCOP(=O)(Cl)OCC +CCOP(=O)(OCC)On1nnc2ccccc2c1=O +CCOP(OCC)OCC +CCOP([O-])OCC +CCOS(=O)(=O)OCC +CCO[Na] +CCO[SiH](OCC)OCC +CCO[Si](CCCN)(OCC)OCC +CCO[Si](OCC)(OCC)OCC +CCS(=O)(=O)Cl +CC[Mg+] +CC[Mg]Br +CC[Mg]Cl +CC[N+](=O)[O-] +CC[N+](CC)(CC)Cc1ccccc1 +CC[N+](CC)(CC)S(=O)(=O)N=C([O-])OC +CC[O+](CC)CC +CC[O-] +CC[S-] +CC[SiH](CC)CC +CC[Si](Cl)(CC)CC +CC[Zn]CC +CCc1ccccc1 +CN(C)C +CN(C)C(=N)N(C)C +CN(C)C(=O)CCl +CN(C)C(=O)Cl +CN(C)C(=O)N=NC(=O)N(C)C +CN(C)C(=S)Cl +CN(C)C(N(C)C)=[N+]1N=[N+]([O-])c2ncccc21 +CN(C)C(OC(C)(C)C)N(C)C +CN(C)C(OC(C)(C)C)OC(C)(C)C +CN(C)C(ON1C(=O)CCC1=O)=[N+](C)C +CN(C)C(On1nnc2ccccc21)=[N+](C)C +CN(C)C(On1nnc2cccnc21)=[N+](C)C +CN(C)C=O +CN(C)CC(=O)O +CN(C)CCCCl +CN(C)CCCN +CN(C)CCCl +CN(C)CCN +CN(C)CCN(C)C +CN(C)CCO +CN(C)N +CN(C)P(=O)(N(C)C)N(C)C +CN(C)S(=O)(=O)Cl +CN(C)[C@H]1CCNC1 +CN(C)[P+](On1nnc2ccccc21)(N(C)C)N(C)C +CN(C)c1ccc(C=O)cc1 +CN(C)c1ccc(P(C(C)(C)C)C(C)(C)C)cc1 +CN(C)c1cccc2cccc(N(C)C)c12 +CN(C)c1ccccc1 +CN(C)c1ccccc1-c1ccccc1P(C1CCCCC1)C1CCCCC1 +CN(C)c1ccccn1 +CN(C)c1ccncc1 +CN(C1CCCCC1)C1CCCCC1 +CN1C(=O)CC(=O)N(C)C1=O +CN1CCC(=O)CC1 +CN1CCC(N)CC1 +CN1CCC(O)CC1 +CN1CCCC1 +CN1CCCC1=O +CN1CCCN(C)C1=O +CN1CCCNCC1 +CN1CCN(C)C1=O +CN1CCN(c2ccc(N)cc2)CC1 +CN1CCNCC1 +CN1CCOCC1 +CN=C=O +CN=C=S +CNCC(=O)O +CNCCN(C)C +CNCCNC +CNCCO +CNCCOC +CNCc1ccccc1 +CNOC +CN[C@@H]1CCCC[C@H]1NC +CNc1ccccc1 +COB(OC)OC +COC(=O)/C=C(/C)N +COC(=O)C#CC(=O)OC +COC(=O)C(=O)Cl +COC(=O)C(=O)OC +COC(=O)C(C)Br +COC(=O)C(C)C +COC(=O)C=P(c1ccccc1)(c1ccccc1)c1ccccc1 +COC(=O)CBr +COC(=O)CC#N +COC(=O)CC(=O)OC +COC(=O)CC(C)=O +COC(=O)CCS +COC(=O)CCl +COC(=O)CN +COC(=O)CP(=O)(OC)OC +COC(=O)CS +COC(=O)Cc1ccc(O)cc1 +COC(=O)Cl +COC(=O)N[C@@H](C(=O)O)c1ccccc1 +COC(=O)N[C@H](C(=O)O)C(C)C +COC(=O)OC +COC(=O)c1ccc(B(O)O)cc1 +COC(=O)c1ccc(Br)cc1 +COC(=O)c1ccc(C=O)cc1 +COC(=O)c1ccc(CBr)c(F)c1 +COC(=O)c1ccc(CBr)cc1 +COC(=O)c1ccc(Cl)nc1 +COC(=O)c1ccc(N)cc1 +COC(=O)c1ccc(O)cc1 +COC(=O)c1cccc(CBr)c1 +COC(=O)c1cccc(N)c1 +COC(=O)c1cccc(O)c1 +COC(=O)c1ccccc1 +COC(=O)c1ccccc1N +COC(=O)c1ccccc1S +COC(=O)c1sccc1N +COC(C)(C)C +COC(C)(C)OC +COC(C)(OC)N(C)C +COC(C)(OC)OC +COC(C)=O +COC(CBr)OC +COC(CN)OC +COC(Cl)Cl +COC(OC)N(C)C +COC(OC)OC +COC1CCC(OC)O1 +COC1CCCC1 +COC=O +COCC(=O)Cl +COCC(=O)O +COCC(C)O +COCCBr +COCCCBr +COCCCN +COCCN +COCCN(CCOC)S(F)(F)F +COCCO +COCCOC +COCCOCCOC +COCCOCCl +COCCO[Al+]OCCOC +COCCO[AlH2-]OCCOC +COCCl +COCN(Cc1ccccc1)C[Si](C)(C)C +COC[P+](c1ccccc1)(c1ccccc1)c1ccccc1 +CON=C1C[C@@H](C(=O)O)N(C(=O)OC(C)(C)C)C1 +COP(C)(=O)OC +COP(OC)OC +COS(=O)(=O)C(F)(F)F +COS(=O)(=O)OC +COS(=O)(=O)c1ccc(C)cc1 +CO[Na] +COc1cc(C(=O)O)ccc1-c1cc2nccc(-c3ccc(OC4CCOCC4)c(C#N)c3)c2o1 +COc1cc(C=O)cc(OC)c1OC +COc1cc(C=O)ccc1O +COc1cc(N)c(Cl)cc1C(=O)O +COc1cc(N)cc(OC)c1 +COc1cc(N)cc(OC)c1OC +COc1cc(N)ccc1-n1cnc(C)c1 +COc1cc2nccc(Cl)c2cc1OC +COc1cc2nccc(Oc3ccc(N)cc3)c2cc1OC +COc1cc2ncnc(Cl)c2cc1OC +COc1ccc(B(O)O)cc1 +COc1ccc(B(O)O)cc1OC +COc1ccc(B(O)O)cn1 +COc1ccc(Br)cc1 +COc1ccc(Br)cn1 +COc1ccc(C(=O)CBr)cc1 +COc1ccc(C(=O)Cl)cc1 +COc1ccc(C(=O)Cl)cc1OC +COc1ccc(C(Cl)(c2ccccc2)c2ccc(OC)cc2)cc1 +COc1ccc(C=O)cc1 +COc1ccc(C=O)cc1O +COc1ccc(C=O)cc1OC +COc1ccc(CBr)cc1 +COc1ccc(CCN)cc1OC +COc1ccc(CCl)cc1 +COc1ccc(CN)c(OC)c1 +COc1ccc(CN)cc1 +COc1ccc(CO)cc1 +COc1ccc(C[C@H]2NC(=O)n3c2nc2ccccc23)cc1 +COc1ccc(Cn2cc3c(n2)c(Cl)nc2ccc(OC)cc23)cc1 +COc1ccc(I)cc1 +COc1ccc(N)cc1 +COc1ccc(N)cc1OC +COc1ccc(N)cn1 +COc1ccc(O)cc1 +COc1ccc(OC)c(P(C2CCCCC2)C2CCCCC2)c1-c1c(C(C)C)cc(C(C)C)cc1C(C)C +COc1ccc(P2(=S)SP(=S)(c3ccc(OC)cc3)S2)cc1 +COc1ccc(S(=O)(=O)Cl)cc1 +COc1ccc(S)cc1 +COc1ccc2c3c1O[C@H]1C(=O)CC[C@@]4(O)[C@@H](C2)N(C)CC[C@]314 +COc1ccc2nc(N)sc2c1 +COc1cccc(B(O)O)c1 +COc1cccc(Br)c1 +COc1cccc(C(=O)Cl)c1 +COc1cccc(C=O)c1 +COc1cccc(N)c1 +COc1cccc(O)c1 +COc1cccc(OC)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1 +COc1ccccc1 +COc1ccccc1B(O)O +COc1ccccc1C=O +COc1ccccc1CN +COc1ccccc1N +COc1ccccc1N1CCNCC1 +COc1ccccc1O +COc1nc(Cl)nc(OC)n1 +COc1nc(OC)nc([N+]2(C)CCOCC2)n1 +CP(C)C +CS(=O)(=O)Cl +CS(=O)(=O)N1CCNCC1 +CS(=O)(=O)O +CS(=O)(=O)OS(C)(=O)=O +CS(=O)(=O)[O-] +CS(=O)(=O)c1ccc(B(O)O)cc1 +CS(=O)(=O)c1cccc(B(O)O)c1 +CS(=O)[O-] +CS(C)=O +CS(N)(=O)=O +CSCC[C@H](N)C(=O)O +CSSC +CSc1ccccc1 +C[Al](C)C +C[C@@H](C(=O)O)N(C)C(=O)OC(C)(C)C +C[C@@H](N)CO +C[C@@H](N)c1ccccc1 +C[C@@H](NC(=O)OC(C)(C)C)C(=O)O +C[C@H](N)C(=O)O +C[C@H](N)CO +C[C@H](N)c1ccccc1 +C[C@H](NC(=O)OC(C)(C)C)C(=O)O +C[C@H](O)C(=O)O +C[C@H](O)CN +C[C@H]1CNC[C@@H](C)N1 +C[C@H]1CNC[C@@H](C)O1 +C[Mg+] +C[Mg]Br +C[Mg]Cl +C[Mg]I +C[N+](=O)[O-] +C[N+](C)(C)Cc1ccccc1 +C[N+](C)(C)c1ccccc1 +C[N+](C)=CCl +C[N+]1([O-])CCOCC1 +C[O+](C)C +C[O-] +C[P+](c1ccccc1)(c1ccccc1)c1ccccc1 +C[Pd] +C[S+](C)(C)=O +C[S+](C)C +C[S-] +C[Si](C)(C)Br +C[Si](C)(C)C#N +C[Si](C)(C)C(C(N)=O)[Si](C)(C)C +C[Si](C)(C)C(F)(F)F +C[Si](C)(C)C=[N+]=[N-] +C[Si](C)(C)CC(N)=O +C[Si](C)(C)CCO +C[Si](C)(C)CCOCCl +C[Si](C)(C)Cl +C[Si](C)(C)I +C[Si](C)(C)N=C=O +C[Si](C)(C)N=[N+]=[N-] +C[Si](C)(C)N[Si](C)(C)C +C[Si](C)(C)OS(=O)(=O)C(F)(F)F +C[Si](C)(C)[N-][Si](C)(C)C +C[Si](C)(C)[O-] +C[Si](C)(Cl)Cl +C[Sn](C)(C)C +C[Sn](C)(C)Cl +C[Zn]C +C[n+]1ccccc1Cl +Cc1c(CCO)sc[n+]1Cc1ccccc1 +Cc1c[nH]cn1 +Cc1cc(C(=O)O)ccc1N1C(=O)CSC1c1ccc(F)cc1 +Cc1cc(C(C)(C)C)c(O)c(C(C)(C)C)c1 +Cc1cc(C(C)(C)C)nc(C(C)(C)C)c1 +Cc1cc(C)c(CN)c(=O)[nH]1 +Cc1cc(C)c(CN)c(O)n1 +Cc1cc(C)c(N2CCN(c3c(C)cc(C)cc3C)C2=[Ru](Cl)(Cl)(=Cc2ccccc2)[P](C2CCCCC2)(C2CCCCC2)C2CCCCC2)c(C)c1 +Cc1cc(C)c(N2CCN(c3c(C)cc(C)cc3C)C2=[Ru](Cl)(Cl)=Cc2ccccc2OC(C)C)c(C)c1 +Cc1cc(C)c(S(=O)(=O)ON)c(C)c1 +Cc1cc(C)cc(C)c1 +Cc1cc(C)nc(C)c1 +Cc1cc(N)n[nH]1 +Cc1ccc(B(O)O)cc1 +Cc1ccc(Br)cc1 +Cc1ccc(Br)nc1 +Cc1ccc(C(=O)Cl)cc1 +Cc1ccc(C)cc1 +Cc1ccc(C=O)cc1 +Cc1ccc(CBr)cc1 +Cc1ccc(N)cc1 +Cc1ccc(N)nc1 +Cc1ccc(O)cc1 +Cc1ccc(S(=O)(=O)Cl)cc1 +Cc1ccc(S(=O)(=O)NN)cc1 +Cc1ccc(S(=O)(=O)O)cc1 +Cc1ccc(S(=O)(=O)OS(=O)(=O)c2ccc(C)cc2)cc1 +Cc1ccc(S(=O)(=O)[O-])cc1 +Cc1cccc(B(O)O)c1 +Cc1cccc(C)c1 +Cc1cccc(C)c1N +Cc1cccc(C)n1 +Cc1cccc(N)c1 +Cc1cccc(N)n1 +Cc1cccc(N=C=O)c1 +Cc1cccc(O)c1 +Cc1ccccc1 +Cc1ccccc1B(O)O +Cc1ccccc1C +Cc1ccccc1C(=O)Cl +Cc1ccccc1C=O +Cc1ccccc1N +Cc1ccccc1O +Cc1ccccc1P(c1ccccc1C)c1ccccc1C +Cc1ccccc1S(=O)(=O)Cl +Cc1ccccc1S(=O)(=O)O +Cc1ccccn1 +Cc1ccnc(N)c1 +Cc1ccncc1 +Cc1csc(N)n1 +Cc1ncc[nH]1 +Cc1noc(C)c1B(O)O +ClB(Cl)Cl +ClC(Cl)(Cl)C(Cl)(Cl)Cl +ClC(Cl)(Cl)Cl +ClC(Cl)Cl +ClC(c1ccccc1)(c1ccccc1)c1ccccc1 +ClCC1CO1 +ClCCBr +ClCCCBr +ClCCCCBr +ClCCCI +ClCCCl +ClCCN1CCCC1 +ClCCN1CCCCC1 +ClCCN1CCOCC1 +ClCCNCCCl +ClCCl +ClCI +ClCOCc1ccccc1 +ClC[C@H]1CO1 +ClCc1ccc(Cl)cc1 +ClCc1ccc(Cl)nc1 +ClCc1ccccc1 +ClCc1ccccn1 +ClCc1cccnc1 +ClCc1ccncc1 +ClP(Cl)(Cl)(Cl)Cl +ClP(Cl)Cl +ClP(c1ccccc1)c1ccccc1 +Cl[Al](Cl)Cl +Cl[Cu] +Cl[Cu]Cl +Cl[Fe](Cl)Cl +Cl[Hg]Cl +Cl[Mg]Cc1ccccc1 +Cl[Mg]c1ccccc1 +Cl[Ni]1(Cl)[P](c2ccccc2)(c2ccccc2)CCC[P]1(c1ccccc1)c1ccccc1 +Cl[Ni]Cl +Cl[Pd+] +Cl[Pd](Cl)([P](c1ccccc1)(c1ccccc1)c1ccccc1)[P](c1ccccc1)(c1ccccc1)c1ccccc1 +Cl[Pd]Cl +Cl[Ru](Cl)(=Cc1ccccc1)([P](C1CCCCC1)(C1CCCCC1)C1CCCCC1)[P](C1CCCCC1)(C1CCCCC1)C1CCCCC1 +Cl[SiH](Cl)Cl +Cl[Sn](Cl)(Cl)Cl +Cl[Sn]Cl +Cl[Ti](Cl)(Cl)Cl +Cl[Zn]Cl +Clc1cc(Cl)nc(Cl)n1 +Clc1cc(Cl)ncn1 +Clc1ccc(Br)cc1 +Clc1ccc(CBr)cc1 +Clc1ccc(Cl)nn1 +Clc1ccc(I)cc1 +Clc1ccc([Mg]Br)cc1 +Clc1ccc2ccccc2n1 +Clc1cccc(CBr)c1 +Clc1ccccc1 +Clc1ccccc1CBr +Clc1ccccc1Cl +Clc1ccccn1 +Clc1cccnc1Cl +Clc1ccnc(Cl)n1 +Clc1ccnc2ccccc12 +Clc1cncc(Cl)n1 +Clc1cnccn1 +Clc1nc(-c2ccccc2)nc(-c2ccccc2)n1 +Clc1nc(Cl)c2ccccc2n1 +Clc1nc(Cl)nc(Cl)n1 +Clc1nc2ccccc2[nH]1 +Clc1nc2ccccc2o1 +Clc1ncc(Br)c(Cl)n1 +Clc1ncc(Br)cn1 +Clc1ncc(Cl)c(Cl)n1 +Clc1ncccn1 +Clc1nccnc1Cl +Clc1ncnc2[nH]ccc12 +Clc1ncnc2nc[nH]c12 +Cn1cc(B2OC(C)(C)C(C)(C)O2)cn1 +Cn1ccc(N)n1 +Cn1ccc2ccc(-c3cc(Cl)cc4nccnc34)cc21 +Cn1ccnc1 +FB(F)F +FC(F)(F)CI +FC(F)(F)I +FC(F)(F)c1ccc(Br)cc1 +FC(F)(F)c1ccc(CBr)cc1 +FC(F)(F)c1ccc(Cl)nc1 +FC(F)(F)c1ccccc1 +FC(F)(F)c1cnc(Cl)nc1Cl +FC(F)Cl +FC1(F)CNC1 +FCCBr +F[B-](F)(F)F +F[N+]12CC[N+](CCl)(CC1)CC2 +F[P-](F)(F)(F)(F)F +Fc1cc(F)cc(Br)c1 +Fc1ccc(Br)cc1 +Fc1ccc(Br)cn1 +Fc1ccc(Br)nc1 +Fc1ccc(CBr)cc1 +Fc1ccc(CCl)cc1 +Fc1ccc(I)cc1 +Fc1ccc(N2CCNCC2)cc1 +Fc1ccc(S)cc1 +Fc1ccc([Mg]Br)cc1 +Fc1ccc2c(C3CCNCC3)noc2c1 +Fc1cccc(CBr)c1 +Fc1ccccc1 +Fc1ccccc1CBr +Fc1ccccn1 +Fc1cnc(Cl)nc1Cl +I[Cu]I +Ic1ccccc1 +N#CBr +N#CC(Cl)(Cl)Cl +N#CC1=C(C#N)C(=O)C(Cl)=C(Cl)C1=O +N#CCBr +N#CCC#N +N#CCC(=O)O +N#CCC(N)=O +N#CCC(N)=S +N#CCCCBr +N#CCCl +N#CCc1ccccc1 +N#CN +N#C[Cu] +N#C[Cu]C#N +N#C[O-] +N#C[S-] +N#C[Zn]C#N +N#Cc1ccc(B(O)O)cc1 +N#Cc1ccc(Br)cc1 +N#Cc1ccc(C(=O)Cl)cc1 +N#Cc1ccc(C=O)cc1 +N#Cc1ccc(CBr)cc1 +N#Cc1ccc(Cl)nc1 +N#Cc1ccc(F)cc1 +N#Cc1ccc(N)cc1 +N#Cc1ccc(O)cc1 +N#Cc1cccc(B(O)O)c1 +N#Cc1cccc(Br)c1 +N#Cc1cccc(CBr)c1 +N#Cc1cccc(N)c1 +N#Cc1cccc(O)c1 +N#Cc1ccccc1 +N#Cc1ccccc1CBr +N#Cc1ccccc1F +N#Cc1ccccc1N +N#Cc1ccccn1 +N#Cc1cccnc1Cl +N=C(N)N +N=C(N)NCCC[C@H](N)C(=O)O +N=C(N)NN +N=C(c1ccccc1)c1ccccc1 +N=C=N +N=CN +NC(=O)CBr +NC(=O)CCl +NC(=O)CI +NC(=O)[O-] +NC(=O)c1ccccc1 +NC(CO)(CO)CO +NC(CO)CO +NC(N)=O +NC(N)=S +NC(c1ccccc1)c1ccccc1 +NC12CC3CC(CC(C3)C1)C2 +NC1CC1 +NC1CCC1 +NC1CCCC1 +NC1CCCCC1 +NC1CCN(Cc2ccccc2)CC1 +NC1CCOCC1 +NC=O +NCC(=O)O +NCC(F)(F)F +NCC(N)=O +NCC(O)CO +NCC1CC1 +NCC1CCCCC1 +NCC1CCOCC1 +NCCC(=O)O +NCCCCCC(=O)O +NCCCCCCN +NCCCCO +NCCCC[C@H](N)C(=O)O +NCCCN +NCCCN1CCOCC1 +NCCCO +NCCCc1ccccc1 +NCCCn1ccnc1 +NCCN +NCCN1CCCC1 +NCCN1CCCCC1 +NCCN1CCOCC1 +NCCNCCN +NCCO +NCCOCCO +NCCS +NCCc1[c-]cccc1 +NCCc1c[nH]c2ccccc12 +NCCc1ccc(O)cc1 +NCCc1ccccc1 +NCCc1ccccn1 +NCc1ccc(C(F)(F)F)cc1 +NCc1ccc(Cl)cc1 +NCc1ccc(F)cc1 +NCc1ccccc1 +NCc1ccccn1 +NCc1cccnc1 +NCc1ccco1 +NCc1cccs1 +NCc1ccncc1 +NNC(=O)c1ccccc1 +NNC(N)=O +NNC(N)=S +NNC=O +NNc1ccccc1 +NNc1ccccn1 +NOC1CCCCO1 +NOCc1ccccc1 +NOS(=O)(=O)O +NS(=O)(=O)C1CC1 +NS(=O)(=O)Cl +NS(=O)(=O)O +NS(=O)(=O)c1ccccc1 +NS(N)(=O)=O +N[C@@H](CC(=O)O)C(=O)O +N[C@@H](CCC(=O)O)C(=O)O +N[C@@H](CS)C(=O)O +N[C@@H](Cc1ccc(O)cc1)C(=O)O +N[C@@H](Cc1ccc2ccccc2c1)C(=O)O +N[C@@H](Cc1ccccc1)C(=O)O +N[C@@H]1CCCC[C@H]1N +N[C@H]1CC[C@H](N)CC1 +N[C@H]1CC[C@H](O)CC1 +Nc1c(F)c(N)c(F)c(F)c1F +Nc1cc(Cl)cc(Cl)c1 +Nc1cc[nH]n1 +Nc1ccc(/C=C/C(=O)O)cn1 +Nc1ccc(Br)cc1 +Nc1ccc(Br)cc1F +Nc1ccc(Br)cc1N +Nc1ccc(Br)cn1 +Nc1ccc(C(=O)O)cc1 +Nc1ccc(C(F)(F)F)cc1 +Nc1ccc(Cl)c(Cl)c1 +Nc1ccc(Cl)cc1 +Nc1ccc(Cl)cn1 +Nc1ccc(Cl)nn1 +Nc1ccc(F)c(Cl)c1 +Nc1ccc(F)c(F)c1 +Nc1ccc(F)cc1 +Nc1ccc(F)cc1F +Nc1ccc(F)cn1 +Nc1ccc(I)cc1 +Nc1ccc(I)cc1F +Nc1ccc(N)cc1 +Nc1ccc(N2CCOCC2)cc1 +Nc1ccc(O)cc1 +Nc1ccc(OC(F)(F)F)cc1 +Nc1ccc(S(N)(=O)=O)cc1 +Nc1ccc([N+](=O)[O-])cc1 +Nc1ccc2[nH]ncc2c1 +Nc1cccc(B(O)O)c1 +Nc1cccc(Br)c1 +Nc1cccc(Br)n1 +Nc1cccc(C(=O)O)c1 +Nc1cccc(C(F)(F)F)c1 +Nc1cccc(Cl)c1 +Nc1cccc(F)c1 +Nc1cccc(N)c1 +Nc1cccc(O)c1 +Nc1cccc([N+](=O)[O-])c1 +Nc1ccccc1 +Nc1ccccc1Br +Nc1ccccc1C(=O)O +Nc1ccccc1Cl +Nc1ccccc1F +Nc1ccccc1N +Nc1ccccc1O +Nc1ccccc1S +Nc1ccccn1 +Nc1cccnc1 +Nc1ccncc1 +Nc1ccncn1 +Nc1ccon1 +Nc1cnc(Br)cn1 +Nc1cncc(Br)c1 +Nc1cnccn1 +Nc1nc(Cl)cc(Cl)n1 +Nc1ncc(Br)nc1Br +Nc1ncccc1C(=O)O +Nc1ncccn1 +Nc1nccs1 +Nc1ncnc(Cl)c1Cl +Nc1ncnc2[nH]nc(I)c12 +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2COP(=O)(O)O[C@H]2[C@H]1O +Nc1nnn[nH]1 +O=C(/C=C/c1ccccc1)/C=C/c1ccccc1 +O=C(Br)CBr +O=C(C=Cc1ccccc1)C=Cc1ccccc1 +O=C(CBr)OCc1ccccc1 +O=C(CBr)c1ccc(Br)cc1 +O=C(CBr)c1ccc(Cl)cc1 +O=C(CBr)c1ccc(F)cc1 +O=C(CBr)c1ccccc1 +O=C(CCl)CCl +O=C(Cl)C(=O)Cl +O=C(Cl)C(Cl)(Cl)Cl +O=C(Cl)C1CC1 +O=C(Cl)C1CCC1 +O=C(Cl)C1CCCC1 +O=C(Cl)C1CCCCC1 +O=C(Cl)CBr +O=C(Cl)CCCCl +O=C(Cl)CCCl +O=C(Cl)CCc1ccccc1 +O=C(Cl)CCl +O=C(Cl)Cc1ccccc1 +O=C(Cl)Cl +O=C(Cl)N1CCOCC1 +O=C(Cl)OC(Cl)(Cl)Cl +O=C(Cl)OCC(Cl)(Cl)Cl +O=C(Cl)OCC1c2ccccc2-c2ccccc21 +O=C(Cl)OCCl +O=C(Cl)OCc1ccccc1 +O=C(Cl)Oc1ccc([N+](=O)[O-])cc1 +O=C(Cl)Oc1ccccc1 +O=C(Cl)c1c(Cl)cccc1Cl +O=C(Cl)c1c(F)cccc1F +O=C(Cl)c1ccc(-c2ccccc2)cc1 +O=C(Cl)c1ccc(Br)cc1 +O=C(Cl)c1ccc(C(F)(F)F)cc1 +O=C(Cl)c1ccc(Cl)cc1 +O=C(Cl)c1ccc(Cl)cc1Cl +O=C(Cl)c1ccc(Cl)nc1 +O=C(Cl)c1ccc(F)cc1 +O=C(Cl)c1ccc([N+](=O)[O-])cc1 +O=C(Cl)c1ccc2ccccc2c1 +O=C(Cl)c1cccc(C(F)(F)F)c1 +O=C(Cl)c1cccc(Cl)c1 +O=C(Cl)c1cccc([N+](=O)[O-])c1 +O=C(Cl)c1ccccc1 +O=C(Cl)c1ccccc1C(F)(F)F +O=C(Cl)c1ccccc1Cl +O=C(Cl)c1ccccc1F +O=C(Cl)c1cccnc1 +O=C(Cl)c1ccco1 +O=C(Cl)c1cccs1 +O=C(N=C=S)c1ccccc1 +O=C(N=NC(=O)N1CCCCC1)N1CCCCC1 +O=C(O)/C=C/C(=O)O +O=C(O)/C=C\\C(=O)O +O=C(O)C(=O)O +O=C(O)C(Cl)(Cl)Cl +O=C(O)C(Cl)Cl +O=C(O)C(F)(F)F +O=C(O)C(O)C(O)C(=O)O +O=C(O)C1CC1 +O=C(O)C1CCCCC1 +O=C(O)C1CCNCC1 +O=C(O)C1CCOCC1 +O=C(O)C1CNC1 +O=C(O)CBr +O=C(O)CC(=O)O +O=C(O)CC(O)(CC(=O)O)C(=O)O +O=C(O)CC1CC1 +O=C(O)CCC(=O)O +O=C(O)CCCCC(=O)O +O=C(O)CCS +O=C(O)CCl +O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CC(=O)O +O=C(O)CNC(=O)OCc1ccccc1 +O=C(O)CO +O=C(O)CS +O=C(O)Cc1ccccc1 +O=C(O)O +O=C(O)[C@@H]1CCCN1 +O=C(O)[C@@H]1CC[C@@H]2CN1C(=O)N2OCc1ccccc1 +O=C(O)[C@H](O)[C@@H](O)C(=O)O +O=C(O)c1cc2ccc(Br)cn2n1 +O=C(O)c1cc2ccc(Cl)cn2n1 +O=C(O)c1cc2ncc(Br)cn2n1 +O=C(O)c1cc2ncc(Cl)cn2n1 +O=C(O)c1ccc(B(O)O)cc1 +O=C(O)c1ccc(Br)cc1 +O=C(O)c1ccc(C(=O)O)cc1 +O=C(O)c1ccc(Cl)cc1 +O=C(O)c1ccc(Cl)nc1 +O=C(O)c1ccc(F)cc1 +O=C(O)c1ccc(O)cc1 +O=C(O)c1ccc([N+](=O)[O-])cc1 +O=C(O)c1cccc(-c2noc(C(F)(F)F)n2)c1 +O=C(O)c1cccc(B(O)O)c1 +O=C(O)c1ccccc1 +O=C(O)c1ccccc1O +O=C(O)c1ccccn1 +O=C(O)c1cccnc1 +O=C(O)c1cccnc1Cl +O=C(O)c1cccs1 +O=C(O)c1ccncc1 +O=C(O)c1cnccn1 +O=C(OC(=O)C(F)(F)F)C(F)(F)F +O=C(OC(=O)C(F)F)C(F)F +O=C(OC(=O)c1ccccc1)c1ccccc1 +O=C(OC(Cl)(Cl)Cl)OC(Cl)(Cl)Cl +O=C(OCC1c2ccccc2-c2ccccc21)ON1C(=O)CCC1=O +O=C(OCc1ccccc1)N1CCNCC1 +O=C(OCc1ccccc1)ON1C(=O)CCC1=O +O=C(ON1C(=O)CCC1=O)ON1C(=O)CCC1=O +O=C(OO)c1cccc(Cl)c1 +O=C(OOC(=O)c1ccccc1)c1ccccc1 +O=C([O-])C(F)(F)Cl +O=C([O-])C(F)(F)F +O=C([O-])C(O)C(O)C(=O)[O-] +O=C([O-])Cl +O=C([O-])O +O=C([O-])[C@H](O)[C@@H](O)C(=O)[O-] +O=C([O-])[O-] +O=C(c1ccccc1)c1ccccc1 +O=C(c1ncc[nH]1)c1ncc[nH]1 +O=C(n1ccnc1)n1ccnc1 +O=C1C(Cl)=C(Cl)C(=O)C(Cl)=C1Cl +O=C1C=CC(=O)C=C1 +O=C1C=CC(=O)N1 +O=C1C=CC(=O)O1 +O=C1C=CCC1 +O=C1C=CCCC1 +O=C1CCC(=O)N1 +O=C1CCC(=O)N1Br +O=C1CCC(=O)N1Cl +O=C1CCC(=O)N1I +O=C1CCC(=O)N1O +O=C1CCC(=O)O1 +O=C1CCC1 +O=C1CCC2(CC1)OCCO2 +O=C1CCCC(=O)C1 +O=C1CCCC(=O)O1 +O=C1CCCC1 +O=C1CCCCC1 +O=C1CCCCCC1 +O=C1CCCCCN1 +O=C1CCCCCO1 +O=C1CCCCN1 +O=C1CCCN1 +O=C1CCCO1 +O=C1CCCc2ccccc21 +O=C1CCN(C(=O)OCc2ccccc2)CC1 +O=C1CCN(Cc2ccccc2)CC1 +O=C1CCNCC1 +O=C1CCOCC1 +O=C1CC[C@@H](C(=O)O)N1 +O=C1CCc2ccccc21 +O=C1CNC(=O)N1 +O=C1CNCCN1 +O=C1COC1 +O=C1CSC(=O)N1 +O=C1CSC(=S)N1 +O=C1Cc2cc(F)ccc2N1 +O=C1Cc2ccccc2C(=O)O1 +O=C1Cc2ccccc2N1 +O=C1NC(=O)c2ccccc21 +O=C1NCCO1 +O=C1Nc2ccccc2C1=O +O=C1OC(=O)c2ccccc21 +O=C1OCCN1P(=O)(Cl)N1CCOC1=O +O=C1OCCO1 +O=C1O[C@H]([C@@H](O)CO)C(O)=C1O +O=C1O[C@H]([C@@H](O)CO)C([O-])=C1O +O=C1c2ccccc2C(=O)C1(O)O +O=C1c2ccccc2C(=O)N1CCBr +O=C1c2ccccc2C(=O)N1CCCBr +O=C1c2ccccc2C(=O)N1O +O=C=NC(=O)C(Cl)(Cl)Cl +O=C=NC1CCCCC1 +O=C=NCCCl +O=C=NCc1ccccc1 +O=C=NS(=O)(=O)Cl +O=C=Nc1ccc(C(F)(F)F)cc1 +O=C=Nc1ccc(Cl)c(C(F)(F)F)c1 +O=C=Nc1ccc(Cl)cc1 +O=C=Nc1ccc(F)cc1 +O=C=Nc1cccc(C(F)(F)F)c1 +O=C=Nc1ccccc1 +O=C=O +O=CC(=O)O +O=CC1CC1 +O=CC1CCCC1 +O=CC1CCCCC1 +O=CC=O +O=CCCc1ccccc1 +O=CCCl +O=CCc1ccccc1 +O=CO +O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO +O=C[O-] +O=Cc1c[nH]c2ccccc12 +O=Cc1cc(Br)ccc1F +O=Cc1cc(Br)ccc1O +O=Cc1ccc(B(O)O)cc1 +O=Cc1ccc(Br)cc1 +O=Cc1ccc(C(=O)O)cc1 +O=Cc1ccc(C(F)(F)F)cc1 +O=Cc1ccc(Cl)c(Cl)c1 +O=Cc1ccc(Cl)cc1 +O=Cc1ccc(F)cc1 +O=Cc1ccc(O)c(O)c1 +O=Cc1ccc(O)cc1 +O=Cc1ccc([N+](=O)[O-])cc1 +O=Cc1ccc2c(c1)OCO2 +O=Cc1cccc(B(O)O)c1 +O=Cc1cccc(Br)c1 +O=Cc1cccc(Br)n1 +O=Cc1cccc(Cl)c1 +O=Cc1cccc(F)c1 +O=Cc1cccc(O)c1 +O=Cc1cccc([N+](=O)[O-])c1 +O=Cc1cccc2ccccc12 +O=Cc1ccccc1 +O=Cc1ccccc1Br +O=Cc1ccccc1Cl +O=Cc1ccccc1F +O=Cc1ccccc1O +O=Cc1ccccc1[N+](=O)[O-] +O=Cc1ccccn1 +O=Cc1cccnc1 +O=Cc1ccco1 +O=Cc1cccs1 +O=Cc1ccncc1 +O=Cc1ccsc1 +O=Cc1ncc[nH]1 +O=N[O-] +O=P(Br)(Br)Br +O=P(Cl)(Cl)Cl +O=P(Cl)(Cl)Oc1ccccc1 +O=P(Cl)(c1ccccc1)c1ccccc1 +O=P(O)(O)O +O=P([O-])(O)O +O=P([O-])([O-])O +O=P([O-])([O-])[O-] +O=P(c1ccccc1)(c1ccccc1)c1ccccc1 +O=P12OP3(=O)OP(=O)(O1)OP(=O)(O2)O3 +O=S(=O)(Cl)C(F)(F)F +O=S(=O)(Cl)C1CC1 +O=S(=O)(Cl)CCCCl +O=S(=O)(Cl)CCCl +O=S(=O)(Cl)Cc1ccccc1 +O=S(=O)(Cl)Cl +O=S(=O)(Cl)c1ccc(Br)cc1 +O=S(=O)(Cl)c1ccc(C(F)(F)F)cc1 +O=S(=O)(Cl)c1ccc(Cl)cc1 +O=S(=O)(Cl)c1ccc(F)cc1 +O=S(=O)(Cl)c1ccc2ccccc2c1 +O=S(=O)(Cl)c1ccccc1 +O=S(=O)(Cl)c1cccs1 +O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F +O=S(=O)(O)C(F)(F)F +O=S(=O)(O)Cl +O=S(=O)(O)O +O=S(=O)(O)c1ccccc1 +O=S(=O)(OCC(F)(F)F)C(F)(F)F +O=S(=O)(OCC(F)F)C(F)(F)F +O=S(=O)(OS(=O)(=O)C(F)(F)F)C(F)(F)F +O=S(=O)([O-])C(F)(F)F +O=S(=O)([O-])O +O=S(=O)([O-])OOS(=O)(=O)[O-] +O=S(=O)([O-])O[O-] +O=S(=O)([O-])[O-] +O=S(=O)(c1ccccc1)N(F)S(=O)(=O)c1ccccc1 +O=S(Cl)Cl +O=S([O-])([O-])=S +O=S([O-])O +O=S([O-])OO +O=S([O-])S(=O)(=O)[O-] +O=S([O-])S(=O)[O-] +O=S([O-])[O-] +O=S([O-])c1ccccc1 +O=S1(=O)CCCC1 +O=S1(=O)CCCO1 +O=S1(=O)CCNCC1 +O=S=O +O=[Ag] +O=[Cr](=O)([O-])Cl +O=[Cr](=O)([O-])O[Cr](=O)(=O)[O-] +O=[Cr](=O)=O +O=[Cu-] +O=[Cu] +O=[Mn](=O)(=O)[O-] +O=[Mn]=O +O=[N+]([O-])O +O=[N+]([O-])[O-] +O=[N+]([O-])c1cc(Br)ccc1F +O=[N+]([O-])c1ccc(Br)cc1F +O=[N+]([O-])c1ccc(Br)cn1 +O=[N+]([O-])c1ccc(CBr)cc1 +O=[N+]([O-])c1ccc(Cl)cc1 +O=[N+]([O-])c1ccc(Cl)nc1 +O=[N+]([O-])c1ccc(Cl)nc1Cl +O=[N+]([O-])c1ccc(F)c(Cl)c1 +O=[N+]([O-])c1ccc(F)c(F)c1 +O=[N+]([O-])c1ccc(F)cc1 +O=[N+]([O-])c1ccc(F)cc1F +O=[N+]([O-])c1ccc(O)cc1 +O=[N+]([O-])c1ccc(S(=O)(=O)Cl)cc1 +O=[N+]([O-])c1ccc2[nH]ccc2c1 +O=[N+]([O-])c1cccc(B(O)O)c1 +O=[N+]([O-])c1cccc(O)c1 +O=[N+]([O-])c1cccc(S(=O)(=O)Cl)c1 +O=[N+]([O-])c1ccccc1 +O=[N+]([O-])c1ccccc1F +O=[N+]([O-])c1ccccc1O +O=[N+]([O-])c1ccccc1S(=O)(=O)Cl +O=[N+]([O-])c1cccnc1Cl +O=[N+]([O-])c1cn[nH]c1 +O=[N+]([O-])c1cnc(Cl)nc1Cl +O=[N+]([O-])c1cnccc1Cl +O=[O+][O-] +O=[Os](=O)(=O)=O +O=[PH2]O +O=[Pt] +O=[Pt]=O +O=[Ru](=O)(=O)[O-] +O=[Se]=O +O=[Si](O)O +O=c1[nH]c2ccccc2c(=O)o1 +O=c1n(Cl)c(=O)n(Cl)c(=O)n1Cl +OB(O)C1CC1 +OB(O)O +OB(O)c1cc(F)cc(F)c1 +OB(O)c1cc2ccccc2o1 +OB(O)c1ccc(-c2ccccc2)cc1 +OB(O)c1ccc(Br)cc1 +OB(O)c1ccc(C(F)(F)F)cc1 +OB(O)c1ccc(Cl)cc1 +OB(O)c1ccc(Cl)cc1Cl +OB(O)c1ccc(F)c(F)c1 +OB(O)c1ccc(F)cc1 +OB(O)c1ccc(F)cc1F +OB(O)c1ccc(F)nc1 +OB(O)c1ccc(O)cc1 +OB(O)c1ccc(OC(F)(F)F)cc1 +OB(O)c1ccc(Oc2ccccc2)cc1 +OB(O)c1ccc2c(c1)c1ccccc1n2-c1ccccc1 +OB(O)c1ccc2ccccc2c1 +OB(O)c1cccc(Br)c1 +OB(O)c1cccc(C(F)(F)F)c1 +OB(O)c1cccc(Cl)c1 +OB(O)c1cccc(F)c1 +OB(O)c1cccc(O)c1 +OB(O)c1cccc2ccccc12 +OB(O)c1ccccc1 +OB(O)c1ccccc1C(F)(F)F +OB(O)c1ccccc1Cl +OB(O)c1ccccc1F +OB(O)c1cccnc1 +OB(O)c1cccnc1F +OB(O)c1ccco1 +OB(O)c1cccs1 +OB(O)c1ccncc1 +OB(O)c1ccoc1 +OB(O)c1ccsc1 +OB(O)c1cncnc1 +OC(C(F)(F)F)C(F)(F)F +OC1CCC1 +OC1CCCC1 +OC1CCCCC1 +OC1CCNC1 +OC1CCNCC1 +OC1CCOCC1 +OC1CNC1 +OCC(CO)(CO)CO +OCC(F)(F)F +OCC(O)CO +OCC1CC1 +OCC1CCCCC1 +OCC1CCCO1 +OCC1CCNCC1 +OCC1CO1 +OCCBr +OCCCBr +OCCCCO +OCCCCl +OCCCO +OCCCl +OCCN(CCO)CCO +OCCN1CCNCC1 +OCCN1CCOCC1 +OCCNCCO +OCCO +OCCOCCO +OCCOCCOCCO +OCCS +OCCc1ccccc1 +OC[C@@H]1CCCN1 +OC[C@H]1OC(O)[C@@H](O)[C@@H](O)[C@@H]1O +OC[C@H]1O[C@H](O[C@]2(CO)O[C@H](CO)[C@@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@@H]1O +OCc1ccc(B(O)O)cc1 +OCc1ccccc1 +OCc1ccccn1 +OCc1cccnc1 +O[C@@H]1CCNC1 +O[C@H](CS)[C@H](O)CS +O[C@H]1CCNC1 +O[C@H]1CN2CCC1CC2 +O[K] +O[Na] +Oc1c(F)c(F)c(F)c(F)c1F +Oc1ccc(-c2ccccc2)cc1 +Oc1ccc(Br)cc1 +Oc1ccc(C(F)(F)F)cc1 +Oc1ccc(Cl)cc1 +Oc1ccc(F)cc1 +Oc1ccc(F)cc1F +Oc1ccc(I)cc1 +Oc1ccc(O)cc1 +Oc1ccc(OCc2ccccc2)cc1 +Oc1ccc(Oc2ccccc2)cc1 +Oc1ccc2c(c1)OCO2 +Oc1ccc2cc(Br)ccc2c1 +Oc1ccc2ccccc2c1 +Oc1cccc(Br)c1 +Oc1cccc(C(F)(F)F)c1 +Oc1cccc(Cl)c1 +Oc1cccc(F)c1 +Oc1cccc(O)c1 +Oc1cccc2[nH]nnc12 +Oc1cccc2ccccc12 +Oc1cccc2cccnc12 +Oc1ccccc1 +Oc1ccccc1Br +Oc1ccccc1Cl +Oc1ccccc1F +Oc1ccccc1O +Oc1ccccn1 +Oc1cccnc1 +On1nnc2ccccc21 +On1nnc2cccnc21 +S=C(Cl)Cl +S=C(n1ccnc1)n1ccnc1 +S=C=Nc1ccccc1 +S=C=S +S=P12SP3(=S)SP(=S)(S1)SP(=S)(S2)S3 +SCCCS +SCc1ccccc1 +Sc1ccc(Br)cc1 +Sc1ccc(Cl)cc1 +Sc1ccccc1 +Sc1nc2ccccc2[nH]1 +[2H]C(Cl)(Cl)Cl +[2H]O[2H] +[AlH4-] +[BH3-]C#N +[C-]#N +[C-]#[N+]CC(=O)OCC +[C-]#[N+]CS(=O)(=O)c1ccc(C)cc1 +[CH2-]CCC +[CH]1[CH][CH][C](P(c2ccccc2)c2ccccc2)[CH]1 +[CH]1[CH][CH][C]([PH+](c2ccccc2)c2ccccc2)[CH]1 +[C]=O +[Cu]Br +[Cu]I +[Li]C +[Li]C(C)(C)C +[Li]C(C)CC +[Li]CC +[Li]CCC +[Li]CCCC +[Li]Cl +[Li]N([Si](C)(C)C)[Si](C)(C)C +[Li]O +[Li]c1ccccc1 +[Mg+]c1ccccc1 +[N-]=C=O +[N-]=[N+]=NP(=O)(Oc1ccccc1)Oc1ccccc1 +[N-]=[N+]=NP(=O)(c1ccccc1)c1ccccc1 +[N-]=[N+]=[N-] +[NH3+]O +[Na]OCl +[O-]Cl +[O-][Cl+3]([O-])([O-])O +[O-][Cl+][O-] +[O-][I+3]([O-])([O-])O +[O-][I+3]([O-])([O-])[O-] +[SiH3]c1ccccc1 +c1c[nH]cn1 +c1c[nH]nn1 +c1cc[nH+]cc1 +c1cc[nH]c1 +c1ccc(-c2c(-c3ccccc3)c(-c3ccccc3)[c-](-c3ccccc3)c2-c2ccccc2)cc1 +c1ccc(-c2ccccc2)cc1 +c1ccc(-c2ccccc2P(C2CCCCC2)C2CCCCC2)cc1 +c1ccc(-c2ccccn2)nc1 +c1ccc(C(c2ccccc2)N2CCNCC2)cc1 +c1ccc(C2CCNCC2)cc1 +c1ccc(CC2CCNCC2)cc1 +c1ccc(CN2CCNCC2)cc1 +c1ccc(CNCc2ccccc2)cc1 +c1ccc(N2CCNCC2)cc1 +c1ccc(N2CCNCC2)nc1 +c1ccc(Nc2ccccc2)cc1 +c1ccc(OP(Oc2ccccc2)Oc2ccccc2)cc1 +c1ccc(Oc2ccccc2)cc1 +c1ccc(P(CCCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1 +c1ccc(P(c2ccccc2)[c-]2cccc2)cc1 +c1ccc(P(c2ccccc2)c2ccc3ccccc3c2-c2c(P(c3ccccc3)c3ccccc3)ccc3ccccc23)cc1 +c1ccc(P(c2ccccc2)c2ccccc2)cc1 +c1ccc(P(c2ccccc2)c2ccccc2Oc2ccccc2P(c2ccccc2)c2ccccc2)cc1 +c1ccc(SSc2ccccc2)cc1 +c1ccc(SSc2ccccn2)nc1 +c1ccc([As](c2ccccc2)c2ccccc2)cc1 +c1ccc([P]([Pd][P](c2ccccc2)(c2ccccc2)c2ccccc2)(c2ccccc2)c2ccccc2)cc1 +c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1 +c1ccc2[nH]ccc2c1 +c1ccc2[nH]cnc2c1 +c1ccc2[nH]ncc2c1 +c1ccc2[nH]nnc2c1 +c1ccc2c(c1)CCCN2 +c1ccc2c(c1)CCN2 +c1ccc2c(c1)CCNC2 +c1ccc2c(c1)CNC2 +c1ccc2c(c1)Cc1ccccc1-2 +c1ccc2c(c1)Nc1ccccc1S2 +c1ccc2c(c1)[nH]c1ccccc12 +c1ccc2c(c1)nnn2O[P+](N1CCCC1)(N1CCCC1)N1CCCC1 +c1ccc2ccccc2c1 +c1ccc2ncccc2c1 +c1ccccc1 +c1ccncc1 +c1ccoc1 +c1ccsc1 +c1cn[nH]c1 +c1cnc2[nH]ccc2c1 +c1cnc2c(c1)ccc1cccnc12 +c1coc(P(c2ccco2)c2ccco2)c1 +c1cscn1 +c1nc[nH]n1 +c1nnn[nH]1 \ No newline at end of file diff --git a/src/rxn/availability/smiles_availability.py b/src/rxn/availability/smiles_availability.py new file mode 100644 index 0000000..8d34278 --- /dev/null +++ b/src/rxn/availability/smiles_availability.py @@ -0,0 +1,100 @@ +import logging +from abc import ABC, abstractmethod +from typing import Any, Callable, Dict, Iterator, Optional + +from attr import Factory, define + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +@define +class AvailabilityMatch: + """ + Class holding the information about a match when querying the availability + of a SMILES string. + + Attributes: + details: details on the match. + info: to hold any additional information related to the available + compound, such as price, ID, etc. + """ + + details: str + info: Dict[str, Any] = Factory(dict) + + +class SmilesAvailability(ABC): + """ + Base class for the availability of SMILES strings. + + The base class provides the public functions is_available(), first_match(), and + find_matches(), which are to be called by users. For derived classes, it is + sufficient to implement the protected function _find_matches(). + """ + + def __init__(self, standardizer: Optional[Callable[[str], str]] = None): + """ + Args: + standardizer: function to call for standardizing SMILES strings + before the availability check (typically: canonicalization). + Defaults to no modification of the SMILES strings. + """ + self.standardizer = standardizer + + def __call__(self, smiles: str) -> bool: + """ + Whether the given SMILES string is available. + + This makes the object callable; equivalent to calling is_available(). + """ + return self.is_available(smiles) + + def is_available(self, smiles: str) -> bool: + """Whether the given SMILES string is available.""" + + # We use any() so that the function stops as soon as a source is + # obtained. This avoids iterating to the end unnecessarily. + return any(True for _ in self.find_matches(smiles)) + + def first_match(self, smiles: str) -> Optional[AvailabilityMatch]: + """Get the first source match for the given SMILES string (None if no + match at all).""" + + # Note: this stops as soon as the first match is found. + return next(self.find_matches(smiles), None) + + def find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """ + Find the sources from where a SMILES string is available. + + The formulation of the function as a generator/iterator allows for early + stopping when a first source is found. + + Args: + smiles: SMILES string to get the availability for. + + Returns: + Iterator/Generator over matches for the given SMILES string. + """ + + if self.standardizer is not None: + try: + smiles = self.standardizer(smiles) + except Exception as e: + logger.warning(f'Error when standardizing SMILES "{smiles}": {e}') + return + + yield from self._find_matches(smiles) + + @abstractmethod + def _find_matches(self, smiles: str) -> Iterator[AvailabilityMatch]: + """ + Protected function to obtain the matches for a given SMILES string. + + This function is called from the public find_matches() function, on an + already standardized SMILES string. + + Args: + smiles: SMILES string to get the sources for (already standardized). + """ diff --git a/src/rxn/availability/utils.py b/src/rxn/availability/utils.py new file mode 100644 index 0000000..e50958a --- /dev/null +++ b/src/rxn/availability/utils.py @@ -0,0 +1,20 @@ +from typing import Callable + + +def wrap_standardizer_with_tilde_substitution( + smiles_standardizer: Callable[[str], str] +) -> Callable[[str], str]: + """ + Wrap a SMILES standardizer to make it replace tildes with dots. + + Since IsAvailable is still being used with molecules containing "~" as a + fragment bond, it is necessary to remain compatible with it. This function + ensures this compatibility by replacing tildes with dots as a first step + for the SMILES standardization. + """ + + def wrapped_standardizer(smiles: str) -> str: + smiles = smiles.replace("~", ".") + return smiles_standardizer(smiles) + + return wrapped_standardizer diff --git a/tests/example_compounds.txt b/tests/example_compounds.txt new file mode 100644 index 0000000..89473e8 --- /dev/null +++ b/tests/example_compounds.txt @@ -0,0 +1,3 @@ +# Some example compounds for tests +CC(Cc1ccc(cc1)C(C(=O)O)C)C +CC(=O)Nc1ccc(cc1)O diff --git a/tests/test_availability_combiner.py b/tests/test_availability_combiner.py new file mode 100644 index 0000000..39c5c9d --- /dev/null +++ b/tests/test_availability_combiner.py @@ -0,0 +1,38 @@ +from typing import Callable, List + +from rxn.availability.availability_combiner import AvailabilityCombiner +from rxn.availability.availability_from_smarts import AvailabilityFromSmarts +from rxn.availability.availability_from_smiles import AvailabilityFromSmiles + + +def test_availability_combiner(): + # exclusion criteria (note that simple callables are allowed) + excluded: List[Callable[[str], bool]] = [ + AvailabilityFromSmiles(["CO", "COC"]), + AvailabilityFromSmarts(["[Na+]"]), + lambda x: x == "C", + ] + + exact_smiles_availability = AvailabilityFromSmiles(["CCCC", "C", "CO", "CCO"]) + smarts_availability = AvailabilityFromSmarts(["[O;H1]"]) # simple hydroxy oxygen + + combined = AvailabilityCombiner( + sources=[exact_smiles_availability, smarts_availability], + excluded_sources=excluded, + ) + + # simple positive checks + assert combined.is_available("CCCCCCO") + assert combined.is_available("CCCC") + assert not combined.is_available("CCC") + + # exclusion takes precedence over availability + assert not combined.is_available("CO") + assert not combined.is_available("C") + + # getting the information on which class the availability comes from + combined.add_source_to_match_info_key = "dummy_key" + matches = list(combined.find_matches("CCO")) + assert len(matches) == 2 + assert matches[0].info["dummy_key"] is exact_smiles_availability + assert matches[1].info["dummy_key"] is smarts_availability diff --git a/tests/test_availability_from_regex.py b/tests/test_availability_from_regex.py new file mode 100644 index 0000000..fda76b6 --- /dev/null +++ b/tests/test_availability_from_regex.py @@ -0,0 +1,27 @@ +import re + +from rxn.chemutils.conversion import canonicalize_smiles + +from rxn.availability.availability_from_regex import AvailabilityFromRegex + + +def test_availability_from_regex(): + regexes = [ + re.compile(re.escape("[Na+]")), # sodium ion + re.compile(re.escape("CCc2cc")), # random piece of SMILES + ] + + availability_from_regex = AvailabilityFromRegex(regexes=regexes) + + # Basic checks + assert availability_from_regex("CCc2ccccc2") + assert availability_from_regex("[Na+].[Cl-]") + assert not availability_from_regex("[Na]CCC") + + # possible to have several matches + assert len(list(availability_from_regex.find_matches("[Na+].CCCCc2ccccc2CC"))) == 2 + + # If a standardizer is given: will standardize before trying to match. + # In the case below, no match anymore because the number will be 1 after canonicalization. + availability_from_regex.standardizer = canonicalize_smiles + assert not availability_from_regex("CCc2ccccc2") diff --git a/tests/test_availability_from_smarts.py b/tests/test_availability_from_smarts.py new file mode 100644 index 0000000..a83642d --- /dev/null +++ b/tests/test_availability_from_smarts.py @@ -0,0 +1,41 @@ +from rxn.availability.availability_from_smarts import AvailabilityFromSmarts + + +def test_is_available_from_smarts(): + # 2-connected (etheric) oxygen next to a carbon atom, or a halogen atom + smarts = ["[O;D2]C", "[F,Cl,Br,I]"] + availability_from_smarts = AvailabilityFromSmarts(smarts=smarts) + + # Matching one of the two rules + assert availability_from_smarts("COC") + assert availability_from_smarts("OCCBr") + assert availability_from_smarts("F") + assert availability_from_smarts("CF") + assert availability_from_smarts("C1COCC1") + + # Not matching + assert not availability_from_smarts("CCO") + assert not availability_from_smarts("NON") + assert not availability_from_smarts("OCCS") + + # invalid SMILES are not available + assert not availability_from_smarts("invalid") + + +def test_availability_matches_from_smarts(): + # 2-connected (etheric) oxygen next to a carbon atom, or a halogen atom + smarts = ["[O;D2]C", "[F,Cl,Br,I]"] + availability_from_smarts = AvailabilityFromSmarts(smarts=smarts) + + # The class allows multiple matches for the different SMARTS + assert len(list(availability_from_smarts.find_matches("COC"))) == 1 + assert len(list(availability_from_smarts.find_matches("COCCCBr"))) == 2 + assert len(list(availability_from_smarts.find_matches("CCO"))) == 0 + + # look into the details strings + matches = list(availability_from_smarts.find_matches("COCCCBr")) + details_strings = [match.details for match in matches] + assert details_strings == [ + 'Matching SMARTS "[O;D2]C".', + 'Matching SMARTS "[F,Cl,Br,I]".', + ] diff --git a/tests/test_availability_from_smiles.py b/tests/test_availability_from_smiles.py new file mode 100644 index 0000000..b2d2d10 --- /dev/null +++ b/tests/test_availability_from_smiles.py @@ -0,0 +1,23 @@ +from rxn.chemutils.conversion import canonicalize_smiles + +from rxn.availability.availability_from_smiles import AvailabilityFromSmiles + + +def test_availability_from_smiles(): + available = ["CCO", "CCCC"] + + availability_from_smiles = AvailabilityFromSmiles( + compounds=available, standardizer=canonicalize_smiles + ) + + assert availability_from_smiles("CCO") + assert availability_from_smiles("CCCC") + assert not availability_from_smiles("C") + + # invalid SMILES are not available + assert not availability_from_smiles("invalid") + + # non-canonical form is found, except if we remove the standardizer + assert availability_from_smiles("OCC") + availability_from_smiles.standardizer = None + assert not availability_from_smiles("OCC") diff --git a/tests/test_is_available.py b/tests/test_is_available.py new file mode 100644 index 0000000..20c3bdf --- /dev/null +++ b/tests/test_is_available.py @@ -0,0 +1,16 @@ +from pathlib import Path + +from rxn.availability import IsAvailable + +compounds_filepath = Path(__file__).parent / "example_compounds.txt" + + +def test_is_available_object(): + is_available_object = IsAvailable() + assert is_available_object("B1C2CCCC1CCC2") + assert not is_available_object("CC(Cc1ccc(cc1)C(C(=O)O)C)C") + + is_available_object = IsAvailable(additional_compounds_filepath=compounds_filepath) + assert is_available_object("B1C2CCCC1CCC2") + assert is_available_object("CC(Cc1ccc(cc1)C(C(=O)O)C)C") + assert not is_available_object("C1=CC=C2C(=C1)C=CC=NN2") diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..08b2323 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,21 @@ +from rxn.chemutils.conversion import canonicalize_smiles + +from rxn.availability.utils import wrap_standardizer_with_tilde_substitution + + +def test_wrap_standardizer_with_tilde_substitution(): + standardizer = canonicalize_smiles + wrapped_standardizer = wrap_standardizer_with_tilde_substitution(standardizer) + + # If no tilde in the SMILES: both give the same result. + for smiles in ["C(C)O", "C(C).OC", "O.C.N"]: + assert wrapped_standardizer(smiles) == standardizer(smiles) + + # When a tilde is present, only the wrapped standardizer converts it to a dot. + assert standardizer("C~O") == "C~O" + assert wrapped_standardizer("C~O") == "C.O" + + # In cases where the tilde leads to a failed canonicalization, only the + # wrapped standardizer works. + assert wrapped_standardizer("[Na+]~[H-]") == "[H-].[Na+]" + assert standardizer("[Na+]~[H-]") == "[NaH+]"