Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add computing Hessians using MBTR forcefield #8

Merged
merged 8 commits into from
Sep 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion envs/environment-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ channels:
- pytorch
- conda-forge/label/libint_dev
dependencies:
- python==3.10.*
- python==3.9.*

# Standard data analysis tools
- pandas==1.*
Expand All @@ -20,6 +20,9 @@ dependencies:
# Quantum chemistry
- psi4==1.8.*

# Interatomic forcefields
- dscribe==2.1.*

# Use Conda PyTorch to avoid OpenMP disagreement with other libraries
- pytorch==2.0.*
- cpuonly
Expand Down
6 changes: 4 additions & 2 deletions jitterbug/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ def train(self, data: list[Atoms]) -> object:
"""
raise NotImplementedError()

def mean_hessian(self, model: object) -> list[np.ndarray]:
def mean_hessian(self, model: object) -> np.ndarray:
"""Produce the most-likely Hessian given the model

Args:
model: Model trained by this
model: Model trained by this class
Returns:
The most-likely Hessian given the model
"""

def sample_hessians(self, model: object, num_samples: int) -> list[np.ndarray]:
Expand Down
96 changes: 96 additions & 0 deletions jitterbug/model/mbtr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Learn a potential energy surface with the MBTR representation

MBTR is an easy route for learning a forcefield because it represents
a molecule as a single vector, which means we can case the learning
problem as a simple "molecule->energy" learning problem. Other methods,
such as SOAP, provided atomic-level features that must require an
extra step "molecule->atoms->energy/atom->energy".
"""
from shutil import rmtree

from ase.calculators.calculator import Calculator, all_changes
from ase.vibrations import Vibrations
from ase import Atoms
from sklearn.linear_model import LinearRegression
from dscribe.descriptors import MBTR
import numpy as np

from jitterbug.model.base import EnergyModel


class MBTRCalculator(Calculator):
"""A learnable forcefield based on GPR and fingerprints computed using DScribe"""

implemented_properties = ['energy', 'forces']
default_parameters = {
'descriptor': MBTR(
species=["H", "C", "N", "O"],
geometry={"function": "inverse_distance"},
grid={"min": 0, "max": 1, "n": 100, "sigma": 0.1},
weighting={"function": "exp", "scale": 0.5, "threshold": 1e-3},
periodic=False,
normalization="l2",
),
'model': LinearRegression(),
'intercept': 0., # Normalizing parameters
'scale': 0.
}

def calculate(self, atoms=None, properties=('energy', 'forces'), system_changes=all_changes):
# Compute the energy using the learned model
desc = self.parameters['descriptor'].create_single(atoms)
energy_no_int = self.parameters['model'].predict(desc[None, :])
self.results['energy'] = energy_no_int[0] * self.parameters['scale'] + self.parameters['intercept']

# If desired, compute forces numerically
if 'forces' in properties:
# calculate_numerical_forces use that the calculation of the input atoms,
# even though it is a method of a calculator and not of the input atoms :shrug:
temp_atoms: Atoms = atoms.copy()
temp_atoms.calc = self
self.results['forces'] = self.calculate_numerical_forces(temp_atoms)

def train(self, train_set: list[Atoms]):
"""Train the embedded forcefield object

Args:
train_set: List of Atoms objects containing at least the energy
"""

# Determine the mean energy and subtract it off
energies = np.array([atoms.get_potential_energy() for atoms in train_set])
self.parameters['intercept'] = energies.mean()
energies -= self.parameters['intercept']
self.parameters['scale'] = energies.std()
energies /= self.parameters['scale']

# Compute the descriptors and use them to fit the model
desc = self.parameters['descriptor'].create(train_set)
self.parameters['model'].fit(desc, energies)


class MBTREnergyModel(EnergyModel):
"""Use the MBTR representation to model the potential energy surface

Args:
calc: Calculator used to fit the potential energy surface
reference: Reference structure at which we compute the Hessian
"""

def __init__(self, calc: MBTRCalculator, reference: Atoms):
super().__init__()
self.calc = calc
self.reference = reference

def train(self, data: list[Atoms]) -> MBTRCalculator:
self.calc.train(data)
return self.calc

def mean_hessian(self, model: MBTRCalculator) -> np.ndarray:
self.reference.calc = model
try:
vib = Vibrations(self.reference, name='mbtr-temp')
vib.run()
return vib.get_vibrations().get_hessian_2d()
finally:
rmtree('mbtr-temp', ignore_errors=True)
95 changes: 78 additions & 17 deletions notebooks/proof-of-concept/1_compute-random-offsets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "c6a28419-6831-4197-8973-00c5591e19cb",
"metadata": {
"tags": []
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "c6be56c5-a460-4acd-9b89-8c3d9c812f5f",
"metadata": {
"tags": [
Expand All @@ -51,7 +51,7 @@
"method = 'hf'\n",
"basis = 'def2-svpd'\n",
"threads = min(os.cpu_count(), 12)\n",
"step_size: float = 0.005 # Perturbation amount, used as maximum L2 norm"
"step_size: float = 0.01 # Perturbation amount, used as maximum L2 norm"
]
},
{
Expand All @@ -64,7 +64,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "0b6794cd-477f-45a1-b96f-2332804ddb20",
"metadata": {},
"outputs": [],
Expand All @@ -83,12 +83,23 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "ad9fd725-b1ba-4fec-ae41-959be0e540b3",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"Atoms(symbols='O2N4C8H10', pbc=False, forces=..., calculator=SinglePointCalculator(...))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atoms = read(Path('data') / 'exact' / f'{run_name}.xyz')\n",
"atoms"
Expand All @@ -113,7 +124,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "23502eea-0974-4248-8f19-e85447069c61",
"metadata": {
"tags": []
Expand All @@ -126,7 +137,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "bf1366fc-d9a7-4a98-b9c9-cb3a0209b406",
"metadata": {
"tags": []
Expand All @@ -146,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "d4f21e81-5ec3-4877-a4d1-402077be2ee8",
"metadata": {
"tags": []
Expand All @@ -168,12 +179,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "0915595d-133a-43df-84fc-4ff6a3b538ea",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" Memory set to 3.815 GiB by Python driver.\n",
" Threads set to 12 by Python driver.\n"
]
}
],
"source": [
"calc = Psi4(method=method, basis=basis, num_threads=threads, memory='4096MB')"
]
Expand All @@ -188,12 +209,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "e2a28593-2634-4bb7-ae5b-8f557937bda1",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Need to run 2701 calculations for full accuracy.\n"
]
}
],
"source": [
"n_atoms = len(atoms)\n",
"to_compute = 3 * n_atoms + 3 * n_atoms * (3 * n_atoms + 1) // 2 + 1\n",
Expand All @@ -202,12 +231,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"id": "8bf40523-dcaa-4046-a9c6-74e35178e87f",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Already done 427. 2274 left to do.\n"
]
}
],
"source": [
"with connect(db_path) as db:\n",
" done = len(db)\n",
Expand All @@ -219,7 +256,31 @@
"execution_count": null,
"id": "a6fa1b33-defc-4b35-895d-052eb64453fb",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/2701 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" Memory set to 3.815 GiB by Python driver.\n",
" Threads set to 12 by Python driver.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1600/2701 [2:06:01<2:33:24, 8.36s/it]"
]
}
],
"source": [
"pbar = tqdm(total=to_compute)\n",
"pbar.update(done)\n",
Expand Down Expand Up @@ -271,7 +332,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.9.17"
}
},
"nbformat": 4,
Expand Down
Loading
Loading