Skip to content

Commit

Permalink
Merge branch 'main' into add_jacobian
Browse files Browse the repository at this point in the history
  • Loading branch information
WardLT committed Dec 4, 2023
2 parents 68a5589 + d5f72c1 commit 9e82cc6
Show file tree
Hide file tree
Showing 26 changed files with 3,051 additions and 701 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ jobs:
uses: conda-incubator/setup-miniconda@v2
with:
environment-file: envs/environment-cpu.yml
mamba-version: "*"
activate-environment: test
auto-activate-base: true
auto-update-conda: false
remove-profiles: true
architecture: x64
clean-patched-environment-file: true
run-post: true
use-mamba: true
miniforge-version: latest
- name: Install test dependencies
run: |
pip install -e .[test]
Expand Down
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
repos:
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
- id: nbstripout
5 changes: 2 additions & 3 deletions envs/environment-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ name: jitterbug
channels:
- defaults
- conda-forge
- pytorch
- conda-forge/label/libint_dev
dependencies:
- python==3.9.*
Expand All @@ -26,10 +25,10 @@ dependencies:
- geometric

# Use Conda PyTorch to avoid OpenMP disagreement with other libraries
- pytorch==2.0.*
- cpuonly
- pytorch==2.0.*=*cpu*

- pip
- pip:
- git+https://gitlab.com/ase/ase.git # Needed for MOPAC
- gpytorch==1.11.*
- -e ..[test]
85 changes: 85 additions & 0 deletions jitterbug/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""Tools for assessing the quality of a Hessian compared to a true one"""
from dataclasses import dataclass

import ase
from ase import units
import numpy as np
from ase.vibrations import VibrationsData
from pmutt.statmech import StatMech, presets


@dataclass
class HessianQuality:
"""Measurements of the quality of a Hessian"""

# Thermodynamics
zpe: float
"""Zero point energy (kcal/mol)"""
zpe_error: float
"""Different between the ZPE and the target one"""
cp: list[float]
"""Heat capacity as a function of temperature (units: kcal/mol/K)"""
cp_error: list[float]
"""Difference between known and approximate heat capacity as a function of temperature (units: kcal/mol/K)"""
h: list[float]
"""Enthalpy as a function of temperature (units: kcal/mol)"""
h_error: list[float]
"""Error between known and approximate enthalpy as a function of temperature (units: kcal/mol)"""
temps: list[float]
"""Temperatures at which Cp was evaluated (units: K)"""

# Vibrations
vib_freqs: list[float]
"""Vibrational frequencies for our hessian (units: cm^-1)"""
vib_errors: list[float]
"""Error between each frequency and the corresponding mode in the known hessian"""
vib_mae: float
"""Mean absolute error for the vibrational modes"""


def compare_hessians(atoms: ase.Atoms, known_hessian: np.ndarray, approx_hessian: np.ndarray) -> HessianQuality:
"""Compare two different hessians for same atomic structure
Args:
atoms: Structure
known_hessian: 2D form of the target Hessian
approx_hessian: 2D form of an approximate Hessian
Returns:
Collection of the performance metrics
"""

# Start by making a vibration data object
known_vibs: VibrationsData = VibrationsData.from_2d(atoms, known_hessian)
approx_vibs: VibrationsData = VibrationsData.from_2d(atoms, approx_hessian)

# Compare the vibrational frequencies on the non-zero modes
known_freqs = known_vibs.get_frequencies()
is_real = np.isreal(known_freqs)
approx_freqs = approx_vibs.get_frequencies()
freq_error = np.subtract(approx_freqs[is_real], known_freqs[is_real])
freq_mae = np.abs(freq_error).mean()

# Compare the enthalpy and heat capacity
# TODO (wardlt): Might actually want to compute the symmetry number
known_harm = StatMech(vib_wavenumbers=np.real(known_freqs[is_real]), atoms=atoms, symmetrynumber=1, **presets['harmonic'])
approx_harm = StatMech(vib_wavenumbers=np.real(approx_freqs[is_real]), atoms=atoms, symmetrynumber=1, **presets['harmonic'])

temps = np.linspace(1., 373, 128)
known_cp = np.array([known_harm.get_Cp('kcal/mol/K', T=t) for t in temps])
approx_cp = np.array([approx_harm.get_Cp('kcal/mol/K', T=t) for t in temps])
known_h = np.array([known_harm.get_H('kcal/mol', T=t) for t in temps])
approx_h = np.array([approx_harm.get_H('kcal/mol', T=t) for t in temps])

# Assemble into a result object
return HessianQuality(
zpe=approx_vibs.get_zero_point_energy() * units.mol / units.kcal,
zpe_error=(approx_vibs.get_zero_point_energy() - known_vibs.get_zero_point_energy()) * units.mol / units.kcal,
vib_freqs=np.real(approx_freqs[is_real]).tolist(),
vib_errors=np.abs(freq_error),
vib_mae=freq_mae,
cp=approx_cp.tolist(),
cp_error=(known_cp - approx_cp).tolist(),
h=approx_h,
h_error=(known_h - approx_h).tolist(),
temps=temps.tolist()
)
95 changes: 95 additions & 0 deletions jitterbug/model/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
"""Base class defining the key methods"""
from tempfile import TemporaryDirectory
from typing import List, Optional
from random import choices
from pathlib import Path
import os

import numpy as np
from scipy import stats
from ase import Atoms
from ase.vibrations import Vibrations
from ase.calculators.calculator import Calculator


class EnergyModel:
Expand Down Expand Up @@ -35,3 +44,89 @@ def sample_hessians(self, model: object, num_samples: int) -> list[np.ndarray]:
A list of 2D hessians
"""
raise NotImplementedError()


class ASEEnergyModel(EnergyModel):
"""Energy models which produce a series of ASE :class:`~ase.calculators.calculator.Calculator` objects"""

def __init__(self, reference: Atoms, num_calculators: int, scratch_dir: Optional[Path] = None):
self.reference = reference
self.scratch_dir = scratch_dir
self.num_calculators = num_calculators

def train_calculator(self, data: List[Atoms]) -> Calculator:
"""Train one of the constituent calculators
Args:
data: Training data
Returns:
Retrained calculator
"""
raise NotImplementedError()

def train(self, data: list[Atoms]) -> List[Calculator]:
# Train each on a different bootstrap sample
output = []
for _ in range(self.num_calculators):
sampled_data = choices(data, k=len(data))
output.append(self.train_calculator(sampled_data))
return output

def compute_hessian(self, mol: Atoms, calc: Calculator) -> np.ndarray:
"""Compute the Hessian using one of the calculators
Args:
mol: Molecule to be evaluated
calc: Calculator to use
Returns:
2D Hessian matrix
"""
# Clone the molecule to avoid any cross-talk
mol = mol.copy()

with TemporaryDirectory(dir=self.scratch_dir) as td:
start_dir = Path.cwd()
try:
# Run the vibrations calculation in a temporary directory
os.chdir(td)
mol.calc = calc
vib = Vibrations(mol, name='mbtr-temp')
vib.run()

# Return only the 2D Hessian
return vib.get_vibrations().get_hessian_2d()
finally:
os.chdir(start_dir)

def mean_hessian(self, models: list[Calculator]) -> np.ndarray:
# Run all calculators
hessians = [self.compute_hessian(self.reference, calc) for calc in models]

# Return the mean
return np.mean(hessians, axis=0)

def sample_hessians(self, models: list[Calculator], num_samples: int) -> list[np.ndarray]:
# Run all calculators
hessians = [self.compute_hessian(self.reference, calc) for calc in models]

# Compute the mean and covariance for each parameter
triu_ind = np.triu_indices(hessians[0].shape[0])
hessians_flat = [h[triu_ind] for h in hessians]
hessian_mean = np.mean(hessians_flat, axis=0)
hessian_covar = np.cov(hessians_flat, rowvar=False)

# Generate samples
hessian_mvn = stats.multivariate_normal(hessian_mean, hessian_covar, allow_singular=True)
diag_indices = np.diag_indices(hessians[0].shape[0])
output = []
for sample in hessian_mvn.rvs(size=num_samples):
# Fill in a 2D version
sample_hessian = np.zeros_like(hessians[0])
sample_hessian[triu_ind] = sample

# Make it symmetric
sample_hessian += sample_hessian.T
sample_hessian[diag_indices] /= 2

output.append(sample_hessian)
return output
1 change: 1 addition & 0 deletions jitterbug/model/dscribe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Energy models using `DScribe <https://singroup.github.io/dscribe/latest/index.html>`_"""
87 changes: 87 additions & 0 deletions jitterbug/model/dscribe/globald.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Models which use a single vector to describe a molecule.
Such approaches, such as MBTR, present a simple "molecule->energy" learning problem.
Other methods, such as SOAP, provided atomic-level features that must require an
extra step "molecule->atoms->energy/atom->energy".
We train the models using sklearn for this example.
"""
from typing import List

from dscribe.descriptors.descriptorglobal import DescriptorGlobal
from sklearn.linear_model import LinearRegression
from sklearn.base import BaseEstimator, clone
from dscribe.descriptors import MBTR
from ase.calculators.calculator import Calculator, all_changes
from ase import Atoms
import numpy as np

from jitterbug.model.base import ASEEnergyModel


class DScribeGlobalCalculator(Calculator):
"""A learnable forcefield based on global fingerprints computed using DScribe"""

implemented_properties = ['energy', 'forces']
default_parameters = {
'descriptor': MBTR(
species=["H", "C", "N", "O"],
geometry={"function": "inverse_distance"},
grid={"min": 0, "max": 1, "n": 100, "sigma": 0.1},
weighting={"function": "exp", "scale": 0.5, "threshold": 1e-3},
periodic=False,
normalization="l2",
),
'model': LinearRegression(),
'offset': 0., # Normalizing parameters
'scale': 1.
}

def calculate(self, atoms=None, properties=('energy', 'forces'), system_changes=all_changes):
# Compute the energy using the learned model
desc = self.parameters['descriptor'].create_single(atoms)
energy_unscaled = self.parameters['model'].predict(desc[None, :])
self.results['energy'] = energy_unscaled[0] * self.parameters['scale'] + self.parameters['offset']

# If desired, compute forces numerically
if 'forces' in properties:
# calculate_numerical_forces use that the calculation of the input atoms,
# even though it is a method of a calculator and not of the input atoms :shrug:
temp_atoms: Atoms = atoms.copy()
temp_atoms.calc = self
self.results['forces'] = self.calculate_numerical_forces(temp_atoms)


class DScribeGlobalEnergyModel(ASEEnergyModel):
"""Compute energy using a scikit-learn model that computes energies from global descriptors
Args:
reference: Reference structure at which we compute the Hessian
descriptors: Tool used to compute descriptors
model: Scikit-learn model to use to compute energies given descriptors
num_calculators: Number of models to use in ensemble
"""

def __init__(self, reference: Atoms, descriptors: DescriptorGlobal, model: BaseEstimator, num_calculators: int):
super().__init__(reference, num_calculators)
self.desc = descriptors
self.model = model

def train_calculator(self, data: List[Atoms]) -> Calculator:
# Make a copy of the model
model: BaseEstimator = clone(self.model)

# Compute the descriptors then train
train_x = self.desc.create(data)
train_y = np.array([a.get_potential_energy() for a in data])
scale_y, offset_y = np.std(train_y), np.mean(train_y)
train_y = (train_y - offset_y) / scale_y
model.fit(train_x, train_y)

# Assemble into a Calculator
return DScribeGlobalCalculator(
descriptor=self.desc,
model=model,
offset=offset_y,
scale=scale_y,
)
Loading

0 comments on commit 9e82cc6

Please sign in to comment.