Skip to content

Commit

Permalink
Initial draft of a data model (#16)
Browse files Browse the repository at this point in the history
* Initial draft of a data model

* Update example of what a node could be called

* Add an example MOF

* Ability to read a CIF and parse it

* Update interfaces to use MOFRecord
  • Loading branch information
WardLT authored Aug 16, 2023
1 parent 7b55d85 commit 3d1b620
Show file tree
Hide file tree
Showing 8 changed files with 1,987 additions and 8 deletions.
4 changes: 3 additions & 1 deletion mofa/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

import ase

from mofa.model import MOFRecord


def train_generator(
starting_model: str | Path,
examples: list[object],
examples: list[MOFRecord],
num_epochs: int
) -> Path:
"""Retrain a generative model for MOFs
Expand Down
96 changes: 96 additions & 0 deletions mofa/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Data models for a MOF class"""
from dataclasses import dataclass, field
from functools import cached_property
from pathlib import Path
from io import StringIO

from ase.io.cif import read_cif

import ase


@dataclass
class NodeDescription:
"""The inorganic components of a MOF"""

name: str = ...
"""Human-readable name of the node (e.g., "Cu paddlewheel")"""
xyz: str | None = None
"""XYZ coordinates of each atom in the node
Uses At or Fr as an identifier of the the anchor points
where the linkers attach to the node
- At designates a carbon-carbon bond anchor
- Fr designates other types of linkages
"""


@dataclass
class LigandDescription:
"""Description of organic sections which connect inorganic nodes"""

name: str | None = ...
"""Human-readable name of the linker"""
smiles: str = ...
"""SMILES-format designation of the molecule"""
xyz: str | None = None
"""XYZ coordinates of each atom in the linker"""

fragment_atoms: list[list[int]] | None = None
"""Groups of atoms which attach to the nodes
There are typically two groups of fragment atoms, and these are
never altered during MOF generation."""

@property
def linker_atoms(self) -> list[int]:
"""All atoms which are not part of a fragment"""
raise NotImplementedError()


@dataclass
class MOFRecord:
"""Information available about a certain MOF"""
# Data describing what the MOF is
identifiers: dict[str, str] = field(default_factory=dict)
"""Names of this MOFs is registries (e.g., hMOF)"""
topology: str | None = None
"""Description of the 3D network structure (e.g., pcu) as the topology"""
catenation: int | None = None
"""Degree of catenation. 0 corresponds to no interpenetrating lattices"""
nodes: tuple[NodeDescription] = field(default_factory=tuple)
"""Description of the nodes within the structure"""
ligands: tuple[LigandDescription] = field(default_factory=tuple)
"""Description of each linker within the structure"""

# Information about the 3D structure of the MOF
structure: str = ...
"""A representative 3D structure of the MOF in POSCAR format"""

# Properties
gas_storage: dict[tuple[str, float], float] = field(default_factory=dict)
"""Storage capacity of the MOF for different gases and pressures"""
structure_stability: dict[str, float] = field(default_factory=dict)
"""How likely the structure is to be stable according to different assays
A score of 1 equates to most likely to be stable, 0 as least likely."""

@classmethod
def from_file(cls, cif_path: Path | str, **kwargs) -> 'MOFRecord':
"""Create a MOF description from a CIF file on disk
Keyword arguments can include identifiers of the MOF and
should be passed to the constructor.
Args:
cif_path: Path to the CIF file
Returns:
A MOF record before fragmentation
"""

return MOFRecord(structure=Path(cif_path).read_text(), **kwargs)

@cached_property
def atoms(self) -> ase.Atoms:
"""The structure as an ASE Atoms object"""
return next(read_cif(StringIO(self.structure), index=slice(None)))
35 changes: 31 additions & 4 deletions mofa/scoring/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,43 @@

import ase

from mofa.model import MOFRecord

class LinkerScorer:
"""Scoring functions which operate on the linkers produced by the generator

class Scorer:
"""Base class for tools which score a MOF
Most implementations should be subclasses of the :class:`LigandScorer` or
:class:`MOFScorer`, which provide utility that - for example - extract
the information about the linker.
"""

def score_mof(self, record: MOFRecord) -> float:
"""Score a MOF given the full MOF record
Args:
record: Record to be scored
Returns:
Score value
"""


class LigandScorer(Scorer):
"""Scoring functions which operate on the ligands between nodes in the MOF
Examples:
- Verify the linker is chemically reasonable (e.g., SAScore, SCScore)
- Verify the ligand is chemically reasonable (e.g., SAScore, SCScore)
"""

def __call__(self, linker: ase.Atoms) -> float:
raise NotImplementedError()

def score_mof(self, record: MOFRecord) -> float:
assert len(record.ligands) == 1, 'We do not yet know how to score a MOF with >1 type of linker' # TOOD
raise NotImplementedError()

class MOFScorer:

class MOFScorer(Scorer):
"""Scoring functions which produce a quick estimate of the quality of a MOF
Examples:
Expand All @@ -24,3 +48,6 @@ class MOFScorer:

def __call__(self, mof: ase.Atoms) -> float:
raise NotImplementedError()

def score_mof(self, record: MOFRecord) -> float:
return self(record.atoms)
4 changes: 2 additions & 2 deletions mofa/scoring/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import numpy as np
import ase

from mofa.scoring.base import LinkerScorer
from mofa.scoring.base import MOFScorer


class MinimumDistance(LinkerScorer):
class MinimumDistance(MOFScorer):
"""Rate molecules based on the closest distance between atoms"""

def __call__(self, linker: ase.Atoms) -> float:
Expand Down
25 changes: 25 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

from pytest import fixture
from pathlib import Path
from ase.io.cif import read_cif
import ase

from mofa.model import MOFRecord

_files_path = Path(__file__).parent / 'files'


@fixture()
def example_cif() -> Path:
return _files_path / 'check.cif'


@fixture()
def example_mof(example_cif) -> ase.Atoms:
with open(example_cif) as fp:
return next(read_cif(fp, index=slice(None)))


@fixture()
def example_record(example_cif) -> MOFRecord:
return MOFRecord.from_file(example_cif)
Loading

0 comments on commit 3d1b620

Please sign in to comment.