Skip to content

Commit

Permalink
Add computing Hessians using MBTR forcefield (#8)
Browse files Browse the repository at this point in the history
* Add a ASE calculator for MBTR

* Move back to Py3.9 for dscribe

I get an error "from collections import Iterable" stemming
from the sparse module used by dscribe

* Add force computation

* Add ability to compute Hessian from MBTR

* Remove unused import

* Add an example notebook with MBTR

* Adjust the scale to be a unit normal

Should reduce the likelihood of numerical issues

* Update proof-of-concept with more data, better KRR
  • Loading branch information
WardLT authored Sep 13, 2023
1 parent 93f77b3 commit b7d6b6a
Show file tree
Hide file tree
Showing 8 changed files with 1,001 additions and 52 deletions.
5 changes: 4 additions & 1 deletion envs/environment-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ channels:
- pytorch
- conda-forge/label/libint_dev
dependencies:
- python==3.10.*
- python==3.9.*

# Standard data analysis tools
- pandas==1.*
Expand All @@ -20,6 +20,9 @@ dependencies:
# Quantum chemistry
- psi4==1.8.*

# Interatomic forcefields
- dscribe==2.1.*

# Use Conda PyTorch to avoid OpenMP disagreement with other libraries
- pytorch==2.0.*
- cpuonly
Expand Down
6 changes: 4 additions & 2 deletions jitterbug/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ def train(self, data: list[Atoms]) -> object:
"""
raise NotImplementedError()

def mean_hessian(self, model: object) -> list[np.ndarray]:
def mean_hessian(self, model: object) -> np.ndarray:
"""Produce the most-likely Hessian given the model
Args:
model: Model trained by this
model: Model trained by this class
Returns:
The most-likely Hessian given the model
"""

def sample_hessians(self, model: object, num_samples: int) -> list[np.ndarray]:
Expand Down
96 changes: 96 additions & 0 deletions jitterbug/model/mbtr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Learn a potential energy surface with the MBTR representation
MBTR is an easy route for learning a forcefield because it represents
a molecule as a single vector, which means we can case the learning
problem as a simple "molecule->energy" learning problem. Other methods,
such as SOAP, provided atomic-level features that must require an
extra step "molecule->atoms->energy/atom->energy".
"""
from shutil import rmtree

from ase.calculators.calculator import Calculator, all_changes
from ase.vibrations import Vibrations
from ase import Atoms
from sklearn.linear_model import LinearRegression
from dscribe.descriptors import MBTR
import numpy as np

from jitterbug.model.base import EnergyModel


class MBTRCalculator(Calculator):
"""A learnable forcefield based on GPR and fingerprints computed using DScribe"""

implemented_properties = ['energy', 'forces']
default_parameters = {
'descriptor': MBTR(
species=["H", "C", "N", "O"],
geometry={"function": "inverse_distance"},
grid={"min": 0, "max": 1, "n": 100, "sigma": 0.1},
weighting={"function": "exp", "scale": 0.5, "threshold": 1e-3},
periodic=False,
normalization="l2",
),
'model': LinearRegression(),
'intercept': 0., # Normalizing parameters
'scale': 0.
}

def calculate(self, atoms=None, properties=('energy', 'forces'), system_changes=all_changes):
# Compute the energy using the learned model
desc = self.parameters['descriptor'].create_single(atoms)
energy_no_int = self.parameters['model'].predict(desc[None, :])
self.results['energy'] = energy_no_int[0] * self.parameters['scale'] + self.parameters['intercept']

# If desired, compute forces numerically
if 'forces' in properties:
# calculate_numerical_forces use that the calculation of the input atoms,
# even though it is a method of a calculator and not of the input atoms :shrug:
temp_atoms: Atoms = atoms.copy()
temp_atoms.calc = self
self.results['forces'] = self.calculate_numerical_forces(temp_atoms)

def train(self, train_set: list[Atoms]):
"""Train the embedded forcefield object
Args:
train_set: List of Atoms objects containing at least the energy
"""

# Determine the mean energy and subtract it off
energies = np.array([atoms.get_potential_energy() for atoms in train_set])
self.parameters['intercept'] = energies.mean()
energies -= self.parameters['intercept']
self.parameters['scale'] = energies.std()
energies /= self.parameters['scale']

# Compute the descriptors and use them to fit the model
desc = self.parameters['descriptor'].create(train_set)
self.parameters['model'].fit(desc, energies)


class MBTREnergyModel(EnergyModel):
"""Use the MBTR representation to model the potential energy surface
Args:
calc: Calculator used to fit the potential energy surface
reference: Reference structure at which we compute the Hessian
"""

def __init__(self, calc: MBTRCalculator, reference: Atoms):
super().__init__()
self.calc = calc
self.reference = reference

def train(self, data: list[Atoms]) -> MBTRCalculator:
self.calc.train(data)
return self.calc

def mean_hessian(self, model: MBTRCalculator) -> np.ndarray:
self.reference.calc = model
try:
vib = Vibrations(self.reference, name='mbtr-temp')
vib.run()
return vib.get_vibrations().get_hessian_2d()
finally:
rmtree('mbtr-temp', ignore_errors=True)
95 changes: 78 additions & 17 deletions notebooks/proof-of-concept/1_compute-random-offsets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "c6a28419-6831-4197-8973-00c5591e19cb",
"metadata": {
"tags": []
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "c6be56c5-a460-4acd-9b89-8c3d9c812f5f",
"metadata": {
"tags": [
Expand All @@ -51,7 +51,7 @@
"method = 'hf'\n",
"basis = 'def2-svpd'\n",
"threads = min(os.cpu_count(), 12)\n",
"step_size: float = 0.005 # Perturbation amount, used as maximum L2 norm"
"step_size: float = 0.01 # Perturbation amount, used as maximum L2 norm"
]
},
{
Expand All @@ -64,7 +64,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "0b6794cd-477f-45a1-b96f-2332804ddb20",
"metadata": {},
"outputs": [],
Expand All @@ -83,12 +83,23 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "ad9fd725-b1ba-4fec-ae41-959be0e540b3",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"Atoms(symbols='O2N4C8H10', pbc=False, forces=..., calculator=SinglePointCalculator(...))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atoms = read(Path('data') / 'exact' / f'{run_name}.xyz')\n",
"atoms"
Expand All @@ -113,7 +124,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "23502eea-0974-4248-8f19-e85447069c61",
"metadata": {
"tags": []
Expand All @@ -126,7 +137,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "bf1366fc-d9a7-4a98-b9c9-cb3a0209b406",
"metadata": {
"tags": []
Expand All @@ -146,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "d4f21e81-5ec3-4877-a4d1-402077be2ee8",
"metadata": {
"tags": []
Expand All @@ -168,12 +179,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "0915595d-133a-43df-84fc-4ff6a3b538ea",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" Memory set to 3.815 GiB by Python driver.\n",
" Threads set to 12 by Python driver.\n"
]
}
],
"source": [
"calc = Psi4(method=method, basis=basis, num_threads=threads, memory='4096MB')"
]
Expand All @@ -188,12 +209,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "e2a28593-2634-4bb7-ae5b-8f557937bda1",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Need to run 2701 calculations for full accuracy.\n"
]
}
],
"source": [
"n_atoms = len(atoms)\n",
"to_compute = 3 * n_atoms + 3 * n_atoms * (3 * n_atoms + 1) // 2 + 1\n",
Expand All @@ -202,12 +231,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"id": "8bf40523-dcaa-4046-a9c6-74e35178e87f",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Already done 427. 2274 left to do.\n"
]
}
],
"source": [
"with connect(db_path) as db:\n",
" done = len(db)\n",
Expand All @@ -219,7 +256,31 @@
"execution_count": null,
"id": "a6fa1b33-defc-4b35-895d-052eb64453fb",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/2701 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" Memory set to 3.815 GiB by Python driver.\n",
" Threads set to 12 by Python driver.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1600/2701 [2:06:01<2:33:24, 8.36s/it]"
]
}
],
"source": [
"pbar = tqdm(total=to_compute)\n",
"pbar.update(done)\n",
Expand Down Expand Up @@ -271,7 +332,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.9.17"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit b7d6b6a

Please sign in to comment.