Skip to content

Commit

Permalink
factor out + test trajectory
Browse files Browse the repository at this point in the history
  • Loading branch information
miketynes committed Dec 3, 2024
1 parent cb9f07c commit 8e3b72f
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 188 deletions.
245 changes: 57 additions & 188 deletions 3_cascade/1_prototype-cascade.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
"No science is done here. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "103a28c9-a84c-42fd-b87d-d75635b76705",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down Expand Up @@ -47,7 +55,7 @@
"import numpy as np\n",
"from mace.calculators import mace_mp\n",
"\n",
"\n",
"from cascade.trajectory import CascadeTrajectory\n",
"from cascade.utils import canonicalize, apply_calculator\n",
"from cascade.auditor import RandomAuditor\n",
"from cascade.learning.torchani import TorchANI\n",
Expand Down Expand Up @@ -96,19 +104,23 @@
"output_type": "stream",
"text": [
"Using Materials Project MACE for MACECalculator with /home/mike/.cache/mace/20231210mace128L0_energy_epoch249model\n",
"Using float32 for MACECalculator, which is faster but less accurate. Recommended for MD. Use float64 for geometry optimization.\n",
"Default dtype float32 does not match model dtype float64, converting models to float32.\n"
"Using float32 for MACECalculator, which is faster but less accurate. Recommended for MD. Use float64 for geometry optimization.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/mike/miniconda3/envs/cascade/lib/python3.11/site-packages/torch/cuda/__init__.py:128: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)\n",
" return torch._C._cuda_getDeviceCount() > 0\n",
"/home/mike/miniconda3/envs/cascade/lib/python3.11/site-packages/mace/calculators/mace.py:128: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
" torch.load(f=model_path, map_location=device)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Default dtype float32 does not match model dtype float64, converting models to float32.\n"
]
}
],
"source": [
Expand Down Expand Up @@ -150,207 +162,56 @@
]
},
{
"cell_type": "markdown",
"id": "8623b624-6d03-42b6-bf76-2fd17043a8ac",
"metadata": {},
"source": [
"## Class for trajectories"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e62cbd7c-4a4e-46a9-8d70-b986a95ac792",
"cell_type": "raw",
"id": "18029a5c-a96a-44ad-ab72-5668f20a57c5",
"metadata": {},
"outputs": [],
"source": [
"class CascadeTrajectory:\n",
" \"\"\"A class to encasplulate a cascade trajectory\n",
"\n",
" This is useful for reading and auditing trajectories\n",
" so we know where to start sampling from (e.g., after the last trusted timestep)\n",
" \"\"\"\n",
"\n",
"\n",
" def __init__(self, \n",
" path: str, \n",
" starting: ase.Atoms = None):\n",
" self.path = path\n",
" self.starting = starting\n",
" \n",
" if self.starting is not None:\n",
" write(self.path, self.starting)\n",
" else:\n",
" self.starting = read(self.path)\n",
" \n",
" self.current = starting\n",
" self.current_timestep = 0\n",
" self.last_trusted_timestep = 0\n",
" \n",
" def read(self, index=':', *args, **kwargs) -> list[ase.Atoms]:\n",
" \"\"\"Read the trajectory into an iterable of atoms\"\"\"\n",
" return read(self.path, *args, index=index, **kwargs)\n",
"\n",
" def get_untrusted_segment(self) -> list[ase.Atoms]:\n",
" \"\"\"Return the part of the trajectory that needs to be audited\"\"\"\n",
" return read(self.path, index=f'{self.last_trusted_timestep+1}:')\n",
" \n",
" def trim_untrusted_segment(self):\n",
" \"\"\"Remove the part of a trajectory that failed an audit, updating timesteps as appropriate\"\"\"\n",
" # todo: is there a way to do this without loading into memory?\n",
" write(self.path, read(self.path, index=f':{self.last_trusted_timestep+1}'))\n",
" self.current_timestep = self.last_trusted_timestep\n",
"\n",
" def __repr__(self): \n",
" return f\"CascadeTrajectory(path={self.path}, current_timestep={self.current_timestep}, last_trusted_timestep={self.last_trusted_timestep})\"\n",
" "
]
"source": []
},
{
"cell_type": "markdown",
"id": "fd1f1599-f541-4d40-affb-8368b3292afb",
"metadata": {},
"source": [
"### tests \n",
"\n",
"#### Todo: (these should go in a test suite if we're keeping this), update the coords or something to make sure the right things are getting deleted"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1b225651-b0ee-46be-ac8d-8b55d8ea4c83",
"id": "fffbbce3-a5d2-4f14-9095-261309a069a3",
"metadata": {},
"outputs": [],
"source": [
"write('test.traj', [atoms, atoms.copy()])"
"## Minimum viable cascasde loop"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "766de0f0-2df7-4dc2-813a-06e414047c97",
"execution_count": null,
"id": "b325e1fd-2737-4650-8808-e1fb2b5b2810",
"metadata": {},
"outputs": [],
"source": [
"traj = CascadeTrajectory('test.traj')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "10c2c251-dcc8-4e1d-b8fc-f4e51fd9f552",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86]),\n",
" Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"traj.read()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f0e8a83c-8c95-44f3-ab42-02806c1ed390",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"traj.get_untrusted_segment()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "60a186dd-037b-4395-8284-5933d051c3b4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"read('test.traj', index=':1')"
"class CascadeThinker:\n",
"\n",
" def __init__(self,\n",
" total_steps: int,\n",
" increment_steps: int,\n",
" auditor: "
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "004ff4db-6f18-43dc-a441-e09fb570f7a1",
"execution_count": null,
"id": "a7857c24-b2a0-4ccd-b989-ed88844bec68",
"metadata": {},
"outputs": [],
"source": [
"traj.trim_untrusted_segment()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "c17f0a18-f39d-4e52-8db3-dfe18b8f7423",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"traj.read()"
]
},
{
"cell_type": "raw",
"id": "18029a5c-a96a-44ad-ab72-5668f20a57c5",
"metadata": {},
"source": []
},
{
"cell_type": "markdown",
"id": "fffbbce3-a5d2-4f14-9095-261309a069a3",
"metadata": {},
"source": [
"## Minimum viable cascasde loop"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 6,
"id": "568bdb51-d2a5-4ff5-9b50-13d4bc111e5f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"On traj 1/2\n",
"Running ML-driven dynamics\n"
]
},
{
"name": "stderr",
"output_type": "stream",
Expand All @@ -363,8 +224,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"On traj 1/2\n",
"Running ML-driven dynamics\n",
"CascadeTrajectory(path=si-diffusion-seed=0.traj, current_timestep=64, last_trusted_timestep=0)\n",
"On traj 2/2\n",
"Running ML-driven dynamics\n",
Expand Down Expand Up @@ -470,23 +329,26 @@
" if score > threshold: \n",
" print(f'score > threshold ({score} > {threshold}), running audit calculations and dropping untrusted segment')\n",
" segment = apply_calculator(calc, segment)\n",
" # todo: add to training set\n",
" traj.trim_untrusted_segment()\n",
" else:\n",
" print(f'score < threshold ({score} < {threshold}, marking recent segment as trusted')\n",
" traj.last_trusted_timestep = traj.current_timestep\n",
"\n",
" traj.trusted = traj.current\n",
" \n",
" # otherwise we can run the ML-driven dynamics \n",
" else:\n",
" # then we run dynamics\n",
" print('Running ML-driven dynamics')\n",
" traj.current.calc = learner.make_calculator(model, device='cpu')\n",
" dyn = VelocityVerlet(atoms=traj.current,\n",
" atoms = traj.trusted.copy()\n",
" atoms.calc = learner.make_calculator(model, device='cpu')\n",
" dyn = VelocityVerlet(atoms=atoms,\n",
" timestep=1*units.fs,\n",
" trajectory=TrajectoryWriter(traj.path, mode='a')\n",
" )\n",
" dyn.run(increment_steps)\n",
" traj.current_timestep += increment_steps\n",
" traj.current = atoms\n",
" print(traj)\n",
" \n",
" i += 1\n",
Expand All @@ -499,12 +361,19 @@
"id": "ea864495-142f-4640-b1c6-eaa341dbf581",
"metadata": {},
"source": [
"## did those complete? "
"## did those complete? \n",
"\n",
"This is great, next steps: \n",
"- [ ] diagram out current/trusted logic\n",
"- [ ] add training\n",
"- [ ] break into functions/classes WIP\n",
"- [ ] add tests WIP\n",
"- [ ] add logging"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 7,
"id": "07448032-737c-49ac-9958-2829b95b841e",
"metadata": {},
"outputs": [
Expand All @@ -514,7 +383,7 @@
"[129, 129]"
]
},
"execution_count": 15,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
40 changes: 40 additions & 0 deletions cascade/trajectory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import ase
from ase.io import read, write


class CascadeTrajectory:
"""A class to encasplulate a cascade trajectory
This is useful for reading and auditing trajectories
so we know where to start sampling from (e.g., after the last trusted timestep)
"""
def __init__(self,
path: str,
starting: ase.Atoms = None):
self.path = path
self.starting = starting
if self.starting is not None:
write(self.path, self.starting)
else:
self.starting = read(self.path)

self.current = self.trusted = starting
self.current_timestep = 0
self.last_trusted_timestep = 0

def read(self, index=':', *args, **kwargs) -> list[ase.Atoms]:
"""Read the trajectory into an iterable of atoms"""
return read(self.path, *args, index=index, **kwargs)

def get_untrusted_segment(self) -> list[ase.Atoms]:
"""Return the part of the trajectory that needs to be audited"""
return read(self.path, index=f'{self.last_trusted_timestep+1}:')

def trim_untrusted_segment(self):
"""Remove the part of a trajectory that failed an audit, updating timesteps as appropriate"""
# todo: is there a way to do this without loading into memory?
write(self.path, read(self.path, index=f':{self.last_trusted_timestep+1}'))
self.current_timestep = self.last_trusted_timestep

def __repr__(self):
return f"CascadeTrajectory(path={self.path}, current_timestep={self.current_timestep}, last_trusted_timestep={self.last_trusted_timestep})"
Loading

0 comments on commit 8e3b72f

Please sign in to comment.