From a4686b3c077bfebc1b9cf124c138e18de573872d Mon Sep 17 00:00:00 2001 From: Ben Rhodes Date: Thu, 19 Dec 2024 13:10:29 +0000 Subject: [PATCH] Wrap by default (#41) * Wrap by default * Improve defaults * Fix cell shape * Add tests --------- Co-authored-by: ben rhodes --- orb_models/forcefield/atomic_system.py | 45 +++++---- .../forcefield/featurization_utilities.py | 24 +++++ tests/fixtures/AFI.cif | 98 +++++++++++++++++++ tests/test_atomic_system.py | 48 +++++++++ 4 files changed, 197 insertions(+), 18 deletions(-) create mode 100644 tests/fixtures/AFI.cif create mode 100644 tests/test_atomic_system.py diff --git a/orb_models/forcefield/atomic_system.py b/orb_models/forcefield/atomic_system.py index c3be191..85debf6 100644 --- a/orb_models/forcefield/atomic_system.py +++ b/orb_models/forcefield/atomic_system.py @@ -90,46 +90,57 @@ def atom_graphs_to_ase_atoms( def ase_atoms_to_atom_graphs( atoms: ase.Atoms, - system_config: SystemConfig = SystemConfig( - radius=10.0, max_num_neighbors=20, use_timestep_0=True - ), - system_id: Optional[int] = None, + *, + wrap: bool = True, brute_force_knn: Optional[bool] = None, - device: Optional[torch.device] = torch.device( - "cuda" if torch.cuda.is_available() else "cpu" - ), + device: Optional[torch.device] = None, + system_config: Optional[SystemConfig] = None, + system_id: Optional[int] = None, ) -> AtomGraphs: """Generate AtomGraphs from an ase.Atoms object. Args: atoms: ase.Atoms object - system_config: SystemConfig object - system_id: Optional system_id + wrap: whether to wrap atomic positions into the central unit cell (if there is one). + NOTE: there can be numerical differences from ase's .wrap() method when an atom is near a cell boundary. brute_force_knn: whether to use a 'brute force' knn approach with torch.cdist for kdtree construction. Defaults to None, in which case brute_force is used if we a GPU is avaiable (2-6x faster), but not on CPU (1.5x faster - 4x slower). For very large systems, brute_force may OOM on GPU, so it is recommended to set to False in that case. - device: device to put the tensors on. + device: device to put the tensors on. By default, uses the GPU if available. + system_config: SystemConfig object, specifying the max radius and max num_neighbors + used in the k-nearest neighbors graph construction. + system_id: Optional index, for tracking the identity of a datapoint. Returns: AtomGraphs object """ + if device is None: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + if system_config is None: + system_config = SystemConfig(radius=10.0, max_num_neighbors=20) + atomic_numbers = torch.from_numpy(atoms.numbers).to(torch.long) atom_type_embedding = torch.nn.functional.one_hot( atomic_numbers, num_classes=118 ).type(torch.float32) + positions = torch.from_numpy(atoms.positions).to(torch.float32) + cell = torch.from_numpy(atoms.cell.array).to(torch.float32) + if wrap and torch.any(cell != 0): + positions = featurization_utilities.map_to_pbc_cell(positions, cell) + node_feats = { "atomic_numbers": atomic_numbers.to(torch.int64), "atomic_numbers_embedding": atom_type_embedding.to(torch.float32), # NOTE: positions are stored as features on the AtomGraphs, # but not actually used as input features to the model. - "positions": torch.from_numpy(atoms.positions).to(torch.float32), + "positions": positions, } - system_feats = {"cell": torch.Tensor(atoms.cell.array[None, ...]).to(torch.float)} + system_feats = {"cell": cell.unsqueeze(0)} edge_feats, senders, receivers = _get_edge_feats( - node_feats["positions"], # type: ignore - system_feats["cell"][0], + positions, + cell, system_config.radius, system_config.max_num_neighbors, brute_force=brute_force_knn, @@ -159,10 +170,8 @@ def _get_edge_feats( cell: torch.Tensor, radius: float, max_num_neighbours: int, - brute_force: Optional[bool] = None, - device: Optional[torch.device] = torch.device( - "cuda" if torch.cuda.is_available() else "cpu" - ), + brute_force: Optional[bool], + device: torch.device, ): """Get edge features. diff --git a/orb_models/forcefield/featurization_utilities.py b/orb_models/forcefield/featurization_utilities.py index fb6eb6a..eba609d 100644 --- a/orb_models/forcefield/featurization_utilities.py +++ b/orb_models/forcefield/featurization_utilities.py @@ -365,6 +365,30 @@ def compute_pbc_radius_graph( return torch.stack((senders_torch, receivers), dim=0), vectors +def map_to_pbc_cell( + positions: torch.Tensor, + periodic_boundary_conditions: torch.Tensor, +) -> torch.Tensor: + """Maps positions to within a periodic boundary cell. + + Args: + positions (torch.Tensor): The positions to be mapped. Shape [num_particles, 3] + periodic_boundary_conditions (torch.Tensor): The periodic boundary conditions. Shape 3x3. + + Returns: + torch.Tensor: Positions mapped to within a periodic boundary cell. + """ + # Inverses are a lot more reliable in double precision, so we'll do the whole + # thing in double then go back to single. + positions = positions.double() + periodic_boundary_conditions = periodic_boundary_conditions.double() + # The strategy here is to map our positions to fractional or internal coordinates. + # Then we take the modulo, then map back to euclidian co-ordinates. + fractional_pos = torch.linalg.solve(periodic_boundary_conditions.T, positions.T).T + fractional_pos = fractional_pos % 1.0 + return (fractional_pos @ periodic_boundary_conditions).float() + + def batch_map_to_pbc_cell( positions: torch.Tensor, periodic_boundary_conditions: torch.Tensor, diff --git a/tests/fixtures/AFI.cif b/tests/fixtures/AFI.cif new file mode 100644 index 0000000..412b4ac --- /dev/null +++ b/tests/fixtures/AFI.cif @@ -0,0 +1,98 @@ +# AFI zeolite +data_SiO2 +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 13.86655914 +_cell_length_b 13.86655914 +_cell_length_c 8.60047456 +_cell_angle_alpha 90.00000000 +_cell_angle_beta 90.00000000 +_cell_angle_gamma 120.00000000 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural SiO2 +_chemical_formula_sum 'Si24 O48' +_cell_volume 1432.15645170 +_cell_formula_units_Z 24 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + O O0 1 0.457007 0.334333 0.000000 1 + O O1 1 0.665666 0.122673 0.000000 1 + O O2 1 0.877327 0.542993 0.000000 1 + O O3 1 0.542993 0.665667 0.000000 1 + O O4 1 0.334334 0.877327 0.000000 1 + O O5 1 0.122674 0.457007 0.000000 1 + O O6 1 0.334333 0.457007 0.499999 1 + O O7 1 0.122674 0.665667 0.499999 1 + O O8 1 0.542993 0.877327 0.499999 1 + O O9 1 0.665666 0.542993 0.499999 1 + O O10 1 0.877326 0.334333 0.499999 1 + O O11 1 0.457006 0.122673 0.499999 1 + O O12 1 0.367814 0.367814 0.250000 1 + O O13 1 0.632186 0.000000 0.250000 1 + O O14 1 0.000000 0.632186 0.250000 1 + O O15 1 0.632186 0.632186 0.250000 1 + O O16 1 0.367813 0.000000 0.250000 1 + O O17 1 0.999999 0.367814 0.250000 1 + O O18 1 0.632186 0.632186 0.750000 1 + O O19 1 0.367813 0.000000 0.750000 1 + O O20 1 0.999999 0.367814 0.750000 1 + O O21 1 0.367814 0.367814 0.750000 1 + O O22 1 0.632186 0.000000 0.750000 1 + O O23 1 0.000000 0.632186 0.750000 1 + O O24 1 0.417358 0.208679 0.250000 1 + O O25 1 0.791321 0.208679 0.250000 1 + O O26 1 0.791321 0.582642 0.250000 1 + O O27 1 0.582642 0.791321 0.250000 1 + O O28 1 0.208678 0.791321 0.250000 1 + O O29 1 0.208679 0.417358 0.250000 1 + O O30 1 0.582642 0.791321 0.750000 1 + O O31 1 0.208678 0.791321 0.750000 1 + O O32 1 0.208679 0.417358 0.750000 1 + O O33 1 0.417358 0.208679 0.750000 1 + O O34 1 0.791321 0.208679 0.750000 1 + O O35 1 0.791321 0.582642 0.750000 1 + O O36 1 0.581212 0.418789 0.250000 1 + O O37 1 0.581212 0.162424 0.250000 1 + O O38 1 0.837576 0.418789 0.250000 1 + O O39 1 0.418788 0.581211 0.250000 1 + O O40 1 0.418788 0.837576 0.250000 1 + O O41 1 0.162423 0.581211 0.250000 1 + O O42 1 0.418788 0.581211 0.750000 1 + O O43 1 0.418788 0.837576 0.750000 1 + O O44 1 0.162423 0.581211 0.750000 1 + O O45 1 0.581212 0.418789 0.750000 1 + O O46 1 0.581212 0.162424 0.750000 1 + O O47 1 0.837576 0.418789 0.750000 1 + Si Si48 1 0.456260 0.332886 0.187394 1 + Si Si49 1 0.667114 0.123373 0.187394 1 + Si Si50 1 0.876626 0.543741 0.187394 1 + Si Si51 1 0.543740 0.667114 0.187394 1 + Si Si52 1 0.332886 0.876626 0.187394 1 + Si Si53 1 0.123375 0.456259 0.187394 1 + Si Si54 1 0.332885 0.456259 0.312606 1 + Si Si55 1 0.123373 0.667114 0.312606 1 + Si Si56 1 0.543740 0.876626 0.312606 1 + Si Si57 1 0.667115 0.543741 0.312606 1 + Si Si58 1 0.876626 0.332886 0.312606 1 + Si Si59 1 0.456260 0.123373 0.312606 1 + Si Si60 1 0.543740 0.667114 0.812605 1 + Si Si61 1 0.332886 0.876626 0.812605 1 + Si Si62 1 0.123375 0.456259 0.812605 1 + Si Si63 1 0.456260 0.332886 0.812605 1 + Si Si64 1 0.667114 0.123373 0.812605 1 + Si Si65 1 0.876626 0.543741 0.812605 1 + Si Si66 1 0.667115 0.543741 0.687394 1 + Si Si67 1 0.876626 0.332886 0.687394 1 + Si Si68 1 0.456260 0.123373 0.687394 1 + Si Si69 1 0.332885 0.456259 0.687394 1 + Si Si70 1 0.123373 0.667114 0.687394 1 + Si Si71 1 0.543740 0.876626 0.687394 1 \ No newline at end of file diff --git a/tests/test_atomic_system.py b/tests/test_atomic_system.py new file mode 100644 index 0000000..0e1407c --- /dev/null +++ b/tests/test_atomic_system.py @@ -0,0 +1,48 @@ +import ase.io +import numpy as np +import torch +from orb_models.forcefield.base import batch_graphs +from orb_models.forcefield.atomic_system import ( + atom_graphs_to_ase_atoms, + ase_atoms_to_atom_graphs, +) + + +def test_atoms_to_atom_graphs_invertibility(fixtures_path): + atoms = ase.Atoms(ase.io.read(fixtures_path / "AFI.cif")) + + atom_graphs = ase_atoms_to_atom_graphs(atoms, wrap=False) + recovered_atoms = atom_graphs_to_ase_atoms(atom_graphs)[0] + + assert np.allclose(recovered_atoms.positions, atoms.positions) + assert np.allclose(recovered_atoms.cell, atoms.cell) + assert (recovered_atoms.numbers == atoms.numbers).all() + + +def test_atom_graphs_to_ase_atoms_debatches(fixtures_path): + atoms = ase.Atoms(ase.io.read(fixtures_path / "AFI.cif")) + graphs = [ase_atoms_to_atom_graphs(atoms, wrap=False) for _ in range(4)] + batch = batch_graphs(graphs) + atoms_list = atom_graphs_to_ase_atoms(batch) + assert len(atoms_list) == 4 + assert (atoms_list[0].positions == atoms_list[1].positions).all() + assert (atoms_list[0].get_tags() == atoms_list[1].get_tags()).all() + + +def test_ase_atoms_to_atom_graphs_wraps(fixtures_path): + atoms_unwrapped = ase.Atoms(ase.io.read(fixtures_path / "AFI.cif")) + atoms_unwrapped.positions[:10] += 2.0 * atoms_unwrapped.cell.array.max() + atoms_wrapped = atoms_unwrapped.copy() + atoms_wrapped.wrap() + assert not np.allclose(atoms_wrapped.positions, atoms_unwrapped.positions) + + atom_graphs = ase_atoms_to_atom_graphs(atoms_unwrapped, wrap=False) + assert np.allclose(atom_graphs.positions.numpy(), atoms_unwrapped.positions) + + # Note: this test is slightly indirect. We can't test that wrap=True yields the same + # results as ase's .wrap(), because of slight numerical differences at the boundaries. + # Instead, we test that wrap=True for an unwrapped system yields the same results + # as wrap=True for an ase-wrapped system. + atom_graphs1 = ase_atoms_to_atom_graphs(atoms_unwrapped, wrap=True) + atom_graphs2 = ase_atoms_to_atom_graphs(atoms_wrapped, wrap=True) + assert torch.allclose(atom_graphs1.positions, atom_graphs2.positions)