Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SOAP forcefields #19

Merged
merged 16 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion envs/environment-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ name: jitterbug
channels:
- defaults
- conda-forge
- pytorch
- conda-forge/label/libint_dev
dependencies:
- python==3.9.*
Expand All @@ -31,4 +30,5 @@ dependencies:
- pip
- pip:
- git+https://gitlab.com/ase/ase.git # Needed for MOPAC
- gpytorch==1.11.*
- -e ..[test]
95 changes: 95 additions & 0 deletions jitterbug/model/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
"""Base class defining the key methods"""
from tempfile import TemporaryDirectory
from typing import List, Optional
from random import choices
from pathlib import Path
import os

import numpy as np
from scipy import stats
from ase import Atoms
from ase.vibrations import Vibrations
from ase.calculators.calculator import Calculator


class EnergyModel:
Expand Down Expand Up @@ -35,3 +44,89 @@ def sample_hessians(self, model: object, num_samples: int) -> list[np.ndarray]:
A list of 2D hessians
"""
raise NotImplementedError()


class ASEEnergyModel(EnergyModel):
"""Energy models which produce a series of ASE :class:`~ase.calculators.calculator.Calculator` objects"""

def __init__(self, reference: Atoms, num_calculators: int, scratch_dir: Optional[Path] = None):
self.reference = reference
self.scratch_dir = scratch_dir
self.num_calculators = num_calculators

def train_calculator(self, data: List[Atoms]) -> Calculator:
"""Train one of the constituent calculators

Args:
data: Training data
Returns:
Retrained calculator
"""
raise NotImplementedError()

def train(self, data: list[Atoms]) -> List[Calculator]:
# Train each on a different bootstrap sample
output = []
for _ in range(self.num_calculators):
sampled_data = choices(data, k=len(data))
output.append(self.train_calculator(sampled_data))
return output

def compute_hessian(self, mol: Atoms, calc: Calculator) -> np.ndarray:
"""Compute the Hessian using one of the calculators

Args:
mol: Molecule to be evaluated
calc: Calculator to use
Returns:
2D Hessian matrix
"""
# Clone the molecule to avoid any cross-talk
mol = mol.copy()

with TemporaryDirectory(dir=self.scratch_dir) as td:
start_dir = Path.cwd()
try:
# Run the vibrations calculation in a temporary directory
os.chdir(td)
mol.calc = calc
vib = Vibrations(mol, name='mbtr-temp')
vib.run()

# Return only the 2D Hessian
return vib.get_vibrations().get_hessian_2d()
finally:
os.chdir(start_dir)

def mean_hessian(self, models: list[Calculator]) -> np.ndarray:
# Run all calculators
hessians = [self.compute_hessian(self.reference, calc) for calc in models]

# Return the mean
return np.mean(hessians, axis=0)

def sample_hessians(self, models: list[Calculator], num_samples: int) -> list[np.ndarray]:
# Run all calculators
hessians = [self.compute_hessian(self.reference, calc) for calc in models]

# Compute the mean and covariance for each parameter
triu_ind = np.triu_indices(hessians[0].shape[0])
hessians_flat = [h[triu_ind] for h in hessians]
hessian_mean = np.mean(hessians_flat, axis=0)
hessian_covar = np.cov(hessians_flat, rowvar=False)

# Generate samples
hessian_mvn = stats.multivariate_normal(hessian_mean, hessian_covar, allow_singular=True)
diag_indices = np.diag_indices(hessians[0].shape[0])
output = []
for sample in hessian_mvn.rvs(size=num_samples):
# Fill in a 2D version
sample_hessian = np.zeros_like(hessians[0])
sample_hessian[triu_ind] = sample

# Make it symmetric
sample_hessian += sample_hessian.T
sample_hessian[diag_indices] /= 2

output.append(sample_hessian)
return output
1 change: 1 addition & 0 deletions jitterbug/model/dscribe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Energy models using `DScribe <https://singroup.github.io/dscribe/latest/index.html>`_"""
87 changes: 87 additions & 0 deletions jitterbug/model/dscribe/globald.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Models which use a single vector to describe a molecule.

Such approaches, such as MBTR, present a simple "molecule->energy" learning problem.
Other methods, such as SOAP, provided atomic-level features that must require an
extra step "molecule->atoms->energy/atom->energy".

We train the models using sklearn for this example.
"""
from typing import List

from dscribe.descriptors.descriptorglobal import DescriptorGlobal
from sklearn.linear_model import LinearRegression
from sklearn.base import BaseEstimator, clone
from dscribe.descriptors import MBTR
from ase.calculators.calculator import Calculator, all_changes
from ase import Atoms
import numpy as np

from jitterbug.model.base import ASEEnergyModel


class DScribeGlobalCalculator(Calculator):
"""A learnable forcefield based on global fingerprints computed using DScribe"""

implemented_properties = ['energy', 'forces']
default_parameters = {
'descriptor': MBTR(
species=["H", "C", "N", "O"],
geometry={"function": "inverse_distance"},
grid={"min": 0, "max": 1, "n": 100, "sigma": 0.1},
weighting={"function": "exp", "scale": 0.5, "threshold": 1e-3},
periodic=False,
normalization="l2",
),
'model': LinearRegression(),
'offset': 0., # Normalizing parameters
'scale': 1.
}

def calculate(self, atoms=None, properties=('energy', 'forces'), system_changes=all_changes):
# Compute the energy using the learned model
desc = self.parameters['descriptor'].create_single(atoms)
energy_unscaled = self.parameters['model'].predict(desc[None, :])
self.results['energy'] = energy_unscaled[0] * self.parameters['scale'] + self.parameters['offset']

# If desired, compute forces numerically
if 'forces' in properties:
# calculate_numerical_forces use that the calculation of the input atoms,
# even though it is a method of a calculator and not of the input atoms :shrug:
temp_atoms: Atoms = atoms.copy()
temp_atoms.calc = self
self.results['forces'] = self.calculate_numerical_forces(temp_atoms)


class DScribeGlobalEnergyModel(ASEEnergyModel):
"""Compute energy using a scikit-learn model that computes energies from global descriptors

Args:
reference: Reference structure at which we compute the Hessian
descriptors: Tool used to compute descriptors
model: Scikit-learn model to use to compute energies given descriptors
num_calculators: Number of models to use in ensemble
"""

def __init__(self, reference: Atoms, descriptors: DescriptorGlobal, model: BaseEstimator, num_calculators: int):
super().__init__(reference, num_calculators)
self.desc = descriptors
self.model = model

def train_calculator(self, data: List[Atoms]) -> Calculator:
# Make a copy of the model
model: BaseEstimator = clone(self.model)

# Compute the descriptors then train
train_x = self.desc.create(data)
train_y = np.array([a.get_potential_energy() for a in data])
scale_y, offset_y = np.std(train_y), np.mean(train_y)
train_y = (train_y - offset_y) / scale_y
model.fit(train_x, train_y)

# Assemble into a Calculator
return DScribeGlobalCalculator(
descriptor=self.desc,
model=model,
offset=offset_y,
scale=scale_y,
)
Loading
Loading