diff --git a/notebooks/0_create-test-set/0_get-exact-answer.ipynb b/notebooks/0_create-test-set/0_get-exact-answer.ipynb
index f234bda..33021fa 100644
--- a/notebooks/0_create-test-set/0_get-exact-answer.ipynb
+++ b/notebooks/0_create-test-set/0_get-exact-answer.ipynb
@@ -23,7 +23,7 @@
"from ase.vibrations import VibrationsData, Vibrations\n",
"from ase.calculators.mopac import MOPAC\n",
"from ase.calculators.psi4 import Psi4\n",
- "from ase.optimize import QuasiNewton\n",
+ "from ase.optimize import BFGS\n",
"from ase import Atoms, units\n",
"from ase.io import write, read\n",
"from jitterbug.utils import make_calculator\n",
@@ -58,7 +58,8 @@
"outputs": [],
"source": [
"molecule_name = 'caffeine'\n",
- "method = 'pm7'\n",
+ "relax_method = 'pm7/None' # Method used to relax geometry \n",
+ "hess_method = None # Method used to perform Hessian computation, None to use same\n",
"basis = None # Set to None for MOPAC methods\n",
"threads = min(os.cpu_count(), 12)\n",
"delta = 0.01"
@@ -72,18 +73,37 @@
"Derived"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2f543fd2-ca4c-4d68-a523-14515f351c4b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "relax_method, relax_basis = relax_method.split(\"/\")\n",
+ "if hess_method is None:\n",
+ " hess_method, hess_basis = relax_method, relax_basis\n",
+ "else:\n",
+ " hess_method, hess_basis = hess_method.split(\"/\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"id": "aeebbc77-70e4-4709-90a0-b9aaf54d4cd9",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "run_name = f'{molecule_name}_{method}_{basis}'\n",
+ "run_name = f'{molecule_name}_{hess_method}_{hess_basis}_at_{relax_method}_{relax_basis}'\n",
"run_name_with_delta = f'{run_name}_d={delta:.3g}'\n",
"out_dir = Path('data') / 'exact'\n",
"if (out_dir / f'{run_name_with_delta}-times.json').exists():\n",
- " raise ValueError('Already done!')"
+ " raise ValueError('Already done!')\n",
+ "print(f'Run name: {run_name_with_delta}')"
]
},
{
@@ -160,7 +180,7 @@
},
"outputs": [],
"source": [
- "calc = make_calculator(method, basis, num_threads=threads)"
+ "calc = make_calculator(relax_method, relax_basis, num_threads=threads)"
]
},
{
@@ -180,14 +200,17 @@
},
"outputs": [],
"source": [
- "geom_path = out_dir / f'{run_name}.xyz'"
+ "geom_path = out_dir / f'{molecule_name}_{relax_method}_{relax_basis}.xyz'\n",
+ "print(f'Geometry path: {geom_path}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef903a43-5d6c-47fb-a500-837599c95f91",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"%%time\n",
@@ -196,7 +219,7 @@
" atoms.calc = calc\n",
"else:\n",
" atoms.calc = calc\n",
- " dyn = QuasiNewton(atoms)\n",
+ " dyn = BFGS(atoms)\n",
" with redirect_stderr(devnull):\n",
" dyn.run(fmax=0.01)"
]
@@ -242,6 +265,35 @@
"ASE has a built-in method which uses finite displacements"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "0e70265b-cefd-4d3c-925b-8b2cf13419e4",
+ "metadata": {},
+ "source": [
+ "Make the calculator for the hessian"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a144434b-e478-42e0-a2bd-5c43beab31d0",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "calc = make_calculator(hess_method, hess_basis, num_threads=threads)\n",
+ "atoms.calc = calc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "71949047-df5f-47a5-883f-c329b7ca12bf",
+ "metadata": {},
+ "source": [
+ "Perform the computation"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -332,11 +384,11 @@
"outputs": [],
"source": [
"psi4_path = out_dir / f'{run_name}-psi4.json'\n",
- "if isinstance(calc, Psi4) and \"cc\" not in method and not psi4_path.exists():\n",
+ "if isinstance(calc, Psi4) and \"cc\" not in hess_method and not psi4_path.exists():\n",
" # Compute\n",
" analytic_time = perf_counter()\n",
" calc.set_psi4(atoms)\n",
- " hess = calc.psi4.hessian(f'{method}/{basis}')\n",
+ " hess = calc.psi4.hessian(f'{hess_method}/{hess_basis}')\n",
" analytic_time = perf_counter() - analytic_time\n",
"\n",
" # Convert to ASE format\n",
diff --git a/notebooks/0_create-test-set/1_compare-step-sizes-and-methods.ipynb b/notebooks/0_create-test-set/1_compare-step-sizes-and-methods.ipynb
index f1ed31c..bce4875 100644
--- a/notebooks/0_create-test-set/1_compare-step-sizes-and-methods.ipynb
+++ b/notebooks/0_create-test-set/1_compare-step-sizes-and-methods.ipynb
@@ -52,7 +52,8 @@
},
"outputs": [],
"source": [
- "molecule_name = 'water' # Which water molecule to evaluate\n",
+ "molecule_name = 'water'\n",
+ "relax_level = 'b3lyp_cc-pvtz' # Which water molecule to evaluate. Need both the molecule name and relaxation level\n",
"target_method = ('ccsd(t)', 'cc-pvtz')"
]
},
@@ -74,7 +75,7 @@
},
"outputs": [],
"source": [
- "hessian_paths = list(Path('data/exact/').glob(f'{molecule_name}_*-ase.json'))\n",
+ "hessian_paths = list(Path('data/exact/').glob(f'{molecule_name}_*_at_{relax_level}_d=*-ase.json'))\n",
"print(f'Found {len(hessian_paths)} hessians for {molecule_name}')"
]
},
@@ -87,7 +88,7 @@
},
"outputs": [],
"source": [
- "exact_path = Path(f'data/exact/{molecule_name}_{\"_\".join(target_method)}_d=0.005-ase.json')\n",
+ "exact_path = Path(f'data/exact/{molecule_name}_{\"_\".join(target_method)}_at_{relax_level}_d=0.005-ase.json')\n",
"assert exact_path.exists(), f'Missing reference calculation: {exact_path}'\n",
"exact_hess = VibrationsData.read(exact_path)"
]
@@ -105,7 +106,7 @@
" \"\"\"Load the Hessian and parse the metadata from the filename\n",
" \n",
" Args:\n",
- " path: Path to the Hessia\n",
+ " path: Path to the Hessian\n",
" Returns:\n",
" Dictionary the includes the metadata for the calculation and errors wrt true Hessian\n",
" \"\"\"\n",
@@ -113,7 +114,7 @@
" # Get some of the basic information\n",
" method_name, delta = path.name[:-9].rsplit(\"_d=\", 1)\n",
" delta = float(delta)\n",
- " _, method, basis = method_name.split(\"_\")\n",
+ " _, method, basis = method_name.split(\"_\")[:3]\n",
" \n",
" # Compare to reference\n",
" approx_hess = VibrationsData.read(path)\n",
diff --git a/notebooks/0_create-test-set/2_evaluate-effect-of-rotations.ipynb b/notebooks/0_create-test-set/2_evaluate-effect-of-rotations.ipynb
index 41a3aeb..f67c692 100644
--- a/notebooks/0_create-test-set/2_evaluate-effect-of-rotations.ipynb
+++ b/notebooks/0_create-test-set/2_evaluate-effect-of-rotations.ipynb
@@ -52,7 +52,7 @@
},
"outputs": [],
"source": [
- "molecule_path = 'data/exact/water_pm7_None.xyz'\n",
+ "molecule_path = 'data/exact/caffeine_pm7_None.xyz'\n",
"num_samples: int = 256"
]
},
@@ -152,8 +152,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 22.3 ms, sys: 83.7 ms, total: 106 ms\n",
- "Wall time: 492 ms\n"
+ "CPU times: user 151 ms, sys: 696 ms, total: 847 ms\n",
+ "Wall time: 5.57 s\n"
]
}
],
@@ -185,7 +185,7 @@
"text": [
" 0%| | 0/256 [00:00, ?it/s]/home/lward/miniconda3/envs/jitterbug/lib/python3.9/site-packages/pmutt/statmech/vib.py:87: RuntimeWarning: overflow encountered in sinh\n",
" (0.5 * vib_dimless)**2 * (1. / np.sinh(vib_dimless / 2.))**2\n",
- "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [02:16<00:00, 1.87it/s]\n"
+ "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [30:09<00:00, 7.07s/it]\n"
]
}
],
@@ -258,38 +258,38 @@
" \n",
"
\n",
" mean | \n",
- " 0.015256 | \n",
- " 2.120676 | \n",
+ " 0.008504 | \n",
+ " 0.124474 | \n",
"
\n",
" \n",
" std | \n",
- " 0.011612 | \n",
- " 0.997429 | \n",
+ " 0.004583 | \n",
+ " 0.045956 | \n",
"
\n",
" \n",
" min | \n",
- " 0.000040 | \n",
- " 0.273227 | \n",
+ " 0.000007 | \n",
+ " 0.016011 | \n",
"
\n",
" \n",
" 25% | \n",
- " 0.006243 | \n",
- " 1.387332 | \n",
+ " 0.004681 | \n",
+ " 0.088121 | \n",
"
\n",
" \n",
" 50% | \n",
- " 0.012741 | \n",
- " 1.964503 | \n",
+ " 0.008094 | \n",
+ " 0.124565 | \n",
"
\n",
" \n",
" 75% | \n",
- " 0.022034 | \n",
- " 2.795209 | \n",
+ " 0.013257 | \n",
+ " 0.168693 | \n",
"
\n",
" \n",
" max | \n",
- " 0.044429 | \n",
- " 4.493064 | \n",
+ " 0.015370 | \n",
+ " 0.219113 | \n",
"
\n",
" \n",
"\n",
@@ -298,13 +298,13 @@
"text/plain": [
" zpe_error vib_mae\n",
"count 256.000000 256.000000\n",
- "mean 0.015256 2.120676\n",
- "std 0.011612 0.997429\n",
- "min 0.000040 0.273227\n",
- "25% 0.006243 1.387332\n",
- "50% 0.012741 1.964503\n",
- "75% 0.022034 2.795209\n",
- "max 0.044429 4.493064"
+ "mean 0.008504 0.124474\n",
+ "std 0.004583 0.045956\n",
+ "min 0.000007 0.016011\n",
+ "25% 0.004681 0.088121\n",
+ "50% 0.008094 0.124565\n",
+ "75% 0.013257 0.168693\n",
+ "max 0.015370 0.219113"
]
},
"execution_count": 9,
@@ -319,7 +319,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 10,
"id": "3e4ffe3f-b8c3-4112-ba7b-7e22119591ca",
"metadata": {
"tags": []
@@ -331,13 +331,13 @@
"Text(0, 0.5, 'Frequency')"
]
},
- "execution_count": 12,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
- "image/png": "",
+ "image/png": "",
"text/plain": [
""
]
@@ -367,7 +367,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 11,
"id": "f2151e7b-79d4-4c3d-95f4-45d4f61006a5",
"metadata": {},
"outputs": [],
diff --git a/notebooks/data/README.md b/notebooks/0_create-test-set/data/README.md
similarity index 100%
rename from notebooks/data/README.md
rename to notebooks/0_create-test-set/data/README.md
diff --git a/notebooks/data/structures/README.md b/notebooks/0_create-test-set/data/structures/README.md
similarity index 100%
rename from notebooks/data/structures/README.md
rename to notebooks/0_create-test-set/data/structures/README.md
diff --git a/notebooks/data/structures/butanol.json b/notebooks/0_create-test-set/data/structures/butanol.json
similarity index 100%
rename from notebooks/data/structures/butanol.json
rename to notebooks/0_create-test-set/data/structures/butanol.json
diff --git a/notebooks/data/structures/caffeine.json b/notebooks/0_create-test-set/data/structures/caffeine.json
similarity index 100%
rename from notebooks/data/structures/caffeine.json
rename to notebooks/0_create-test-set/data/structures/caffeine.json
diff --git a/notebooks/data/structures/water.json b/notebooks/0_create-test-set/data/structures/water.json
similarity index 100%
rename from notebooks/data/structures/water.json
rename to notebooks/0_create-test-set/data/structures/water.json
diff --git a/notebooks/0_create-test-set/run-all-methods.sh b/notebooks/0_create-test-set/run-all-methods.sh
index 497ba86..1b2d6f5 100644
--- a/notebooks/0_create-test-set/run-all-methods.sh
+++ b/notebooks/0_create-test-set/run-all-methods.sh
@@ -1,18 +1,17 @@
#! /bin/bash
-molecule=butanol
-methods="pm7//None xtb//None hf//cc-pvtz b3lyp//cc-pvtz wb97x-d//cc-pvtz m062x//cc-pvtz ccsd(t)//cc-pvdz"
+molecule=water
+relax_method="b3lyp/cc-pvtz"
+
+hess_methods="pm7/None xtb/None hf/cc-pvtz b3lyp/cc-pvtz wb97x-d/cc-pvtz m062x/cc-pvtz ccsd(t)/cc-pvdz"
deltas="0.04 0.02 0.01 0.005 0.0025"
-#methods="ccsd(t)//cc-pvtz"
+#hess_methods="ccsd(t)/cc-pvtz"
#deltas=0.005
notebook=0_get-exact-answer.ipynb
-for name in $methods; do
- echo $name
+for method in $hess_methods; do
for delta in $deltas; do
- method=$(echo $name | cut -d "/" -f 1)
- basis=$(echo $name | cut -d "/" -f 3)
- papermill -p method $method -p basis $basis -p delta $delta -p molecule_name $molecule $notebook live.ipynb
+ papermill -p hess_method $method -p relax_method $relax_method -p delta $delta -p molecule_name $molecule $notebook live.ipynb
done
done
diff --git a/notebooks/1_explore-sampling-methods/0_random-directions-same-distance.ipynb b/notebooks/1_explore-sampling-methods/0_random-directions-same-distance.ipynb
index ea8d7b8..5bbd580 100644
--- a/notebooks/1_explore-sampling-methods/0_random-directions-same-distance.ipynb
+++ b/notebooks/1_explore-sampling-methods/0_random-directions-same-distance.ipynb
@@ -47,9 +47,8 @@
},
"outputs": [],
"source": [
- "molecule_name = 'caffeine'\n",
- "method = 'hf'\n",
- "basis = 'def2-svpd'\n",
+ "starting_geometry = '../data/exact/caffeine_pm7_None.xyz'\n",
+ "method = 'hf/def2-svpd'\n",
"threads = min(os.cpu_count(), 12)\n",
"step_size: float = 0.005 # Perturbation amount, used as maximum L2 norm"
]
@@ -69,8 +68,11 @@
"metadata": {},
"outputs": [],
"source": [
- "run_name = Path(starting_geometry).name[:-4]\n",
- "name, method, basis = run_name.split(\"_\")"
+ "relax_name = Path(starting_geometry).name[:-4]\n",
+ "name, relax_method, relax_basis = relax_name.split(\"_\")\n",
+ "method, basis = method.split(\"/\")\n",
+ "run_name = f'{name}_{method}_{basis}_at_{relax_method}_{relax_basis}'\n",
+ "print(f'Run name: {run_name}')"
]
},
{
diff --git a/notebooks/1_explore-sampling-methods/1_random-directions-variable-distance.ipynb b/notebooks/1_explore-sampling-methods/1_random-directions-variable-distance.ipynb
index 4567a8c..d46d849 100644
--- a/notebooks/1_explore-sampling-methods/1_random-directions-variable-distance.ipynb
+++ b/notebooks/1_explore-sampling-methods/1_random-directions-variable-distance.ipynb
@@ -39,7 +39,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "52252ee2-315c-48bb-8cba-07620e6e2faa",
+ "id": "c61794ce-ca24-470f-903f-4fc5118af1d3",
"metadata": {
"tags": [
"parameters"
@@ -48,13 +48,14 @@
"outputs": [],
"source": [
"starting_geometry = '../data/exact/caffeine_pm7_None.xyz'\n",
+ "method = 'pm7/None'\n",
"threads = min(os.cpu_count(), 12)\n",
- "step_size: float = 0.005 # Lambda parameter for an expontential distribution for the Perturbation amount"
+ "step_size: float = 0.005 # Perturbation amount, used as maximum L2 norm"
]
},
{
"cell_type": "markdown",
- "id": "7010df09-73b2-4d58-be03-15a5f0d04b4c",
+ "id": "7ebb8a2a-b2f8-4647-9cd4-d9a05efc4790",
"metadata": {},
"source": [
"Derived"
@@ -63,12 +64,15 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0b6794cd-477f-45a1-b96f-2332804ddb20",
+ "id": "3177eaf6-7af7-4e7c-8440-e32c172f8669",
"metadata": {},
"outputs": [],
"source": [
- "run_name = Path(starting_geometry).name[:-4]\n",
- "name, method, basis = run_name.split(\"_\")"
+ "relax_name = Path(starting_geometry).name[:-4]\n",
+ "name, relax_method, relax_basis = relax_name.split(\"_\")\n",
+ "method, basis = method.split(\"/\")\n",
+ "run_name = f'{name}_{method}_{basis}_at_{relax_method}_{relax_basis}'\n",
+ "print(f'Run name: {run_name}')"
]
},
{
@@ -226,7 +230,8 @@
" # Sample a perturbation\n",
" disp = np.random.normal(0, 1, size=(n_atoms * 3))\n",
" disp /= np.linalg.norm(disp)\n",
- " my_step_dist = np.random.exponential(scale=step_size)\n",
+ " my_step_dist = np.random.uniform(0, step_size)\n",
+ " pbar.set_description(f'd={my_step_dist:.3e}')\n",
" disp *= my_step_dist * len(atoms)\n",
" disp = disp.reshape((-1, 3))\n",
" \n",
diff --git a/notebooks/1_explore-sampling-methods/2_displace-along-axes.ipynb b/notebooks/1_explore-sampling-methods/2_displace-along-axes.ipynb
index ef96e40..1589071 100644
--- a/notebooks/1_explore-sampling-methods/2_displace-along-axes.ipynb
+++ b/notebooks/1_explore-sampling-methods/2_displace-along-axes.ipynb
@@ -52,6 +52,7 @@
"outputs": [],
"source": [
"starting_geometry = '../data/exact/caffeine_pm7_None.xyz'\n",
+ "method = 'hf/def2-svpd'\n",
"threads = min(os.cpu_count(), 12)\n",
"step_size: float = 0.005 # Lambda parameter for an expontential distribution for the Perturbation amount\n",
"perturbs_per_evaluation: int = 2 # Number of perturbations to perform at once"
@@ -59,7 +60,7 @@
},
{
"cell_type": "markdown",
- "id": "7010df09-73b2-4d58-be03-15a5f0d04b4c",
+ "id": "134b0aa4-f7ef-415f-8334-7039bdf66152",
"metadata": {},
"source": [
"Derived"
@@ -68,12 +69,15 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0b6794cd-477f-45a1-b96f-2332804ddb20",
+ "id": "f47df53a-1b81-4504-a9db-2fcc583d7096",
"metadata": {},
"outputs": [],
"source": [
- "run_name = Path(starting_geometry).name[:-4]\n",
- "name, method, basis = run_name.split(\"_\")"
+ "relax_name = Path(starting_geometry).name[:-4]\n",
+ "name, relax_method, relax_basis = relax_name.split(\"_\")\n",
+ "method, basis = method.split(\"/\")\n",
+ "run_name = f'{name}_{method}_{basis}_at_{relax_method}_{relax_basis}'\n",
+ "print(f'Run name: {run_name}')"
]
},
{
diff --git a/notebooks/1_explore-sampling-methods/3_displace-along-vibrational-modes.ipynb b/notebooks/1_explore-sampling-methods/3_displace-along-vibrational-modes.ipynb
index b327b4d..1739a1e 100644
--- a/notebooks/1_explore-sampling-methods/3_displace-along-vibrational-modes.ipynb
+++ b/notebooks/1_explore-sampling-methods/3_displace-along-vibrational-modes.ipynb
@@ -29,6 +29,7 @@
"from pathlib import Path\n",
"from tqdm import tqdm \n",
"import numpy as np\n",
+ "import shutil\n",
"import os"
]
},
@@ -52,6 +53,7 @@
"outputs": [],
"source": [
"starting_geometry = '../data/exact/caffeine_pm7_None.xyz'\n",
+ "method = 'pm7/None'\n",
"threads = min(os.cpu_count(), 12)\n",
"step_size: float = 0.002 # Target energy increase (units: eV)\n",
"perturbs_per_evaluation: int = 16 # Number of perturbations to perform at once\n",
@@ -70,14 +72,17 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0b6794cd-477f-45a1-b96f-2332804ddb20",
+ "id": "91cc7cb8-a620-4395-84fc-533c041c652e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "run_name = Path(starting_geometry).name[:-4]\n",
- "name, method, basis = run_name.split(\"_\")"
+ "relax_name = Path(starting_geometry).name[:-4]\n",
+ "name, relax_method, relax_basis = relax_name.split(\"_\")\n",
+ "method, basis = method.split(\"/\")\n",
+ "run_name = f'{name}_{method}_{basis}_at_{relax_method}_{relax_basis}'\n",
+ "print(f'Run name: {run_name}')"
]
},
{
@@ -142,6 +147,8 @@
"source": [
"%%time\n",
"atoms.calc = lower_calc\n",
+ "if Path('vib').exists():\n",
+ " shutil.rmtree('vib')\n",
"vib = Vibrations(atoms)\n",
"vib.run()"
]
diff --git a/notebooks/1_explore-sampling-methods/run-all-methods.sh b/notebooks/1_explore-sampling-methods/run-all-methods.sh
index 42eea5d..8bcca29 100644
--- a/notebooks/1_explore-sampling-methods/run-all-methods.sh
+++ b/notebooks/1_explore-sampling-methods/run-all-methods.sh
@@ -1,23 +1,25 @@
#! /bin/bash
xyz=../data/exact/caffeine_pm7_None.xyz
+method='pm7/None'
+
for step_size in 0.02; do
# Do the randomized methods
- for method in 0_random-directions-same-distance.ipynb 1_random-directions-variable-distance.ipynb; do
- papermill -p starting_geometry $xyz -p step_size $step_size $method last.ipynb
+ for notebook in 0_random-directions-same-distance.ipynb 1_random-directions-variable-distance.ipynb; do
+ papermill -p starting_geometry $xyz -p method $method -p step_size $step_size $notebook last.ipynb
done
# Test with different reductions for "along axes"
notebook=2_displace-along-axes.ipynb
for n in 2 4 8; do
- papermill -p starting_geometry $xyz -p perturbs_per_evaluation $n -p step_size $step_size $notebook last.ipynb
+ papermill -p starting_geometry $xyz -p method $method -p perturbs_per_evaluation $n -p step_size $step_size $notebook last.ipynb
done
done
# Test with the vibrational modes
notebook=3_displace-along-vibrational-modes.ipynb
-for step_size in 0.001 0.002; do
+for step_size in 0.001 0.002; do # These step sizes are energy scales in eV, not distances in Angstrom as above
for n in 16 32 64; do
- papermill -p starting_geometry $xyz -p perturbs_per_evaluation $n -p step_size $step_size $notebook last.ipynb
+ papermill -p starting_geometry $xyz -p method $method -p perturbs_per_evaluation $n -p step_size $step_size $notebook last.ipynb
done
done
diff --git a/notebooks/2_testing-fitting-strategies/1_fit-forcefield-using-mbtr.ipynb b/notebooks/2_testing-fitting-strategies/1_fit-forcefield-using-mbtr.ipynb
index cb29ff0..11fd5c6 100644
--- a/notebooks/2_testing-fitting-strategies/1_fit-forcefield-using-mbtr.ipynb
+++ b/notebooks/2_testing-fitting-strategies/1_fit-forcefield-using-mbtr.ipynb
@@ -57,7 +57,7 @@
},
"outputs": [],
"source": [
- "db_path: str = '../1_explore-sampling-methods/data/along-axes/caffeine_pm7_None_d=5.00e-03-N=2.db'\n",
+ "db_path: str = '../1_explore-sampling-methods/data/along-axes/caffeine_pm7_None_at_pm7_None_d=2.00e-02-N=4.db'\n",
"overwrite: bool = False\n",
"max_size: int = 10000"
]
@@ -80,7 +80,7 @@
"outputs": [],
"source": [
"run_name, sampling_options = Path(db_path).name[:-3].rsplit(\"_\", 1)\n",
- "exact_path = Path('../data/exact/') / f'{run_name}-ase.json'\n",
+ "exact_path = Path('../0_create-test-set/data/exact/') / f'{run_name}_d=0.01-ase.json'\n",
"sampling_name = Path(db_path).parent.name\n",
"out_name = '_'.join([run_name, sampling_name, sampling_options])\n",
"out_dir = Path('data/mbtr/')"
diff --git a/notebooks/3_consolidate-results/0_compare-sampling-strategies-with-mbtr.ipynb b/notebooks/3_consolidate-results/0_compare-sampling-strategies-with-mbtr.ipynb
index c864dfc..7776ede 100644
--- a/notebooks/3_consolidate-results/0_compare-sampling-strategies-with-mbtr.ipynb
+++ b/notebooks/3_consolidate-results/0_compare-sampling-strategies-with-mbtr.ipynb
@@ -50,32 +50,38 @@
},
"outputs": [],
"source": [
- "target_mol = '../data/exact/caffeine_pm7_None.xyz'\n",
+ "target_result = '../0_create-test-set/data/exact/caffeine_pm7_None_at_pm7_None_d=0.01-ase.json'\n",
"target_method = '../2_testing-fitting-strategies/data/mbtr/'\n",
"target_size: int = 1500"
]
},
{
"cell_type": "markdown",
- "id": "8874ea91-b4f3-432a-bd28-0d33b50e24ee",
+ "id": "5af51ec8-7a05-4000-8194-998dd08ce315",
"metadata": {},
"source": [
- "## Load the Exact Result\n",
- "The target molecule filename determines which molecule we'll look for. The name includes both the molecule name and method used to evaluate the hessian"
+ "Derived"
]
},
{
"cell_type": "code",
"execution_count": 3,
- "id": "db22a33d-e70a-4e7b-aad7-39aa4e552804",
+ "id": "7094c318-9f4c-448c-b1cb-bf61b3678851",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "target_mol = Path(target_mol)\n",
- "mol_name = target_mol.name[:-4]\n",
- "atoms = read(target_mol)"
+ "mol_name, _ = Path(target_result).name.rsplit(\"_\", 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8874ea91-b4f3-432a-bd28-0d33b50e24ee",
+ "metadata": {},
+ "source": [
+ "## Load the Exact Result\n",
+ "The target molecule filename determines which molecule we'll look for. The name includes both the molecule name and method used to evaluate the hessian"
]
},
{
@@ -89,7 +95,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 4,
@@ -98,7 +104,7 @@
}
],
"source": [
- "exact_hess = VibrationsData.read(target_mol.parent / f'{mol_name}-ase.json')\n",
+ "exact_hess = VibrationsData.read(target_result)\n",
"exact_hess"
]
},
@@ -118,9 +124,18 @@
"metadata": {
"tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Found 11 approximate Hessians\n"
+ ]
+ }
+ ],
"source": [
- "all_hessians = list(Path(target_method).glob(f\"{mol_name}_*-increment.json\"))"
+ "all_hessians = list(Path(target_method).glob(f\"{mol_name}_*-increment.json\"))\n",
+ "print(f'Found {len(all_hessians)} approximate Hessians')"
]
},
{
@@ -135,16 +150,16 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/21 [00:00, ?it/s]/home/lward/miniconda3/envs/jitterbug/lib/python3.9/site-packages/pmutt/statmech/vib.py:87: RuntimeWarning: overflow encountered in sinh\n",
+ " 0%| | 0/11 [00:00, ?it/s]/home/lward/miniconda3/envs/jitterbug/lib/python3.9/site-packages/pmutt/statmech/vib.py:87: RuntimeWarning: overflow encountered in sinh\n",
" (0.5 * vib_dimless)**2 * (1. / np.sinh(vib_dimless / 2.))**2\n",
- "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:07<00:00, 2.64it/s]"
+ "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:02<00:00, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Loaded 336 approximate hessians\n"
+ "Loaded 176 approximate hessians\n"
]
},
{
@@ -194,6 +209,48 @@
"print(f'Loaded {len(all_results)} approximate hessians')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "c3aa94ce-1f23-4007-8766-3da998db628e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "path ../2_testing-fitting-strategies/data/mbtr/caff...\n",
+ "sampling_method along-axes\n",
+ "options d=2.00e-02-N=2\n",
+ "size 5\n",
+ "d 2.00e-02\n",
+ "N 2\n",
+ "scale_factor 1.0\n",
+ "zpe 5.058148\n",
+ "zpe_error -104.407702\n",
+ "cp [0.014146793803042134, 0.01771395512622287, 0....\n",
+ "cp_error [-0.010065824804868617, -0.01212553988407947, ...\n",
+ "h [5.072098696119492, 5.11863818857305, 5.175908...\n",
+ "h_error [104.39707523324437, 104.36530161384731, 104.3...\n",
+ "temps [1.0, 3.9291338582677167, 6.858267716535433, 9...\n",
+ "vib_freqs [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
+ "vib_errors [218.79974021444124, 187.1848443090143, 148.53...\n",
+ "vib_mae 1119.440878\n",
+ "maxstep NaN\n",
+ "lower NaN\n",
+ "Name: 0, dtype: object"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_results.iloc[0]"
+ ]
+ },
{
"cell_type": "markdown",
"id": "0f243a23-ed89-4f5e-aeac-f23722ef10af",
@@ -204,7 +261,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"id": "2d315d44-7564-468f-bd03-8bbedf8b424c",
"metadata": {
"tags": []
@@ -215,246 +272,6 @@
" all_results[col] = pd.to_numeric(all_results[col])"
]
},
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "eee03c3a-4a53-4e84-995a-c2191e6f6332",
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " path | \n",
- " sampling_method | \n",
- " options | \n",
- " size | \n",
- " d | \n",
- " N | \n",
- " zpe | \n",
- " zpe_error | \n",
- " cp | \n",
- " cp_error | \n",
- " h | \n",
- " h_error | \n",
- " temps | \n",
- " vib_freqs | \n",
- " vib_errors | \n",
- " vib_mae | \n",
- " maxstep | \n",
- " lower | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " ../2_testing-fitting-strategies/data/mbtr/caff... | \n",
- " along-axes | \n",
- " d=1.00e-02-N=8 | \n",
- " 5 | \n",
- " 0.01 | \n",
- " 8.0 | \n",
- " 14.200987 | \n",
- " -95.264864 | \n",
- " [0.000397593092606875, 0.002386964975551445, 0... | \n",
- " [0.0036833759055666425, 0.0032014502665919545,... | \n",
- " [14.201069696784895, 14.20544123215772, 14.216... | \n",
- " [95.26810423257896, 95.27849857026263, 95.2846... | \n",
- " [1.0, 3.9291338582677167, 6.858267716535433, 9... | \n",
- " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n",
- " [365.26521782818406, 345.1085529599917, 194.17... | \n",
- " 1027.154434 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " ../2_testing-fitting-strategies/data/mbtr/caff... | \n",
- " along-axes | \n",
- " d=1.00e-02-N=8 | \n",
- " 91 | \n",
- " 0.01 | \n",
- " 8.0 | \n",
- " 5.163324 | \n",
- " -104.302527 | \n",
- " [0.0019348995359107907, 0.008929018823987386, ... | \n",
- " [0.002146069462262727, -0.0033406035818439863,... | \n",
- " [5.163950489872683, 5.179473698114745, 5.21578... | \n",
- " [104.30522343949119, 104.3044661043056, 104.28... | \n",
- " [1.0, 3.9291338582677167, 6.858267716535433, 9... | \n",
- " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n",
- " [136.3752974454976, 116.1372867519512, 97.3719... | \n",
- " 1113.227006 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " ../2_testing-fitting-strategies/data/mbtr/caff... | \n",
- " along-axes | \n",
- " d=1.00e-02-N=8 | \n",
- " 177 | \n",
- " 0.01 | \n",
- " 8.0 | \n",
- " 6.876866 | \n",
- " -102.588985 | \n",
- " [0.0020382429986622493, 0.008341448274375083, ... | \n",
- " [0.0020427259995112685, -0.0027530330322316837... | \n",
- " [6.878002515584569, 6.8926516980488675, 6.9274... | \n",
- " [102.59117141377929, 102.59128810437149, 102.5... | \n",
- " [1.0, 3.9291338582677167, 6.858267716535433, 9... | \n",
- " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n",
- " [96.75265542338498, 90.41924803705203, 74.0875... | \n",
- " 1092.500033 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " ../2_testing-fitting-strategies/data/mbtr/caff... | \n",
- " along-axes | \n",
- " d=1.00e-02-N=8 | \n",
- " 263 | \n",
- " 0.01 | \n",
- " 8.0 | \n",
- " 89.958157 | \n",
- " -19.507693 | \n",
- " [0.0014020270591578593, 0.0023843729911753324,... | \n",
- " [0.0026789419390156584, 0.003204042250968067, ... | \n",
- " [89.9587466410539, 89.96445676642922, 89.97267... | \n",
- " [19.510427288309955, 19.51948303599113, 19.528... | \n",
- " [1.0, 3.9291338582677167, 6.858267716535433, 9... | \n",
- " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.446... | \n",
- " [111.37319602907411, 108.7601947486991, 66.937... | \n",
- " 263.944079 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " ../2_testing-fitting-strategies/data/mbtr/caff... | \n",
- " along-axes | \n",
- " d=1.00e-02-N=8 | \n",
- " 350 | \n",
- " 0.01 | \n",
- " 8.0 | \n",
- " 93.839129 | \n",
- " -15.626722 | \n",
- " [1.6382310852832182e-08, 0.0004830509673205082... | \n",
- " [0.004080952615862665, 0.005105364274822891, 0... | \n",
- " [93.83912881523764, 93.83954779804499, 93.8423... | \n",
- " [15.630045114126219, 15.644392004375362, 15.65... | \n",
- " [1.0, 3.9291338582677167, 6.858267716535433, 9... | \n",
- " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n",
- " [118.00864162563397, 110.49254554846664, 100.8... | \n",
- " 237.597413 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " path sampling_method \\\n",
- "0 ../2_testing-fitting-strategies/data/mbtr/caff... along-axes \n",
- "1 ../2_testing-fitting-strategies/data/mbtr/caff... along-axes \n",
- "2 ../2_testing-fitting-strategies/data/mbtr/caff... along-axes \n",
- "3 ../2_testing-fitting-strategies/data/mbtr/caff... along-axes \n",
- "4 ../2_testing-fitting-strategies/data/mbtr/caff... along-axes \n",
- "\n",
- " options size d N zpe zpe_error \\\n",
- "0 d=1.00e-02-N=8 5 0.01 8.0 14.200987 -95.264864 \n",
- "1 d=1.00e-02-N=8 91 0.01 8.0 5.163324 -104.302527 \n",
- "2 d=1.00e-02-N=8 177 0.01 8.0 6.876866 -102.588985 \n",
- "3 d=1.00e-02-N=8 263 0.01 8.0 89.958157 -19.507693 \n",
- "4 d=1.00e-02-N=8 350 0.01 8.0 93.839129 -15.626722 \n",
- "\n",
- " cp \\\n",
- "0 [0.000397593092606875, 0.002386964975551445, 0... \n",
- "1 [0.0019348995359107907, 0.008929018823987386, ... \n",
- "2 [0.0020382429986622493, 0.008341448274375083, ... \n",
- "3 [0.0014020270591578593, 0.0023843729911753324,... \n",
- "4 [1.6382310852832182e-08, 0.0004830509673205082... \n",
- "\n",
- " cp_error \\\n",
- "0 [0.0036833759055666425, 0.0032014502665919545,... \n",
- "1 [0.002146069462262727, -0.0033406035818439863,... \n",
- "2 [0.0020427259995112685, -0.0027530330322316837... \n",
- "3 [0.0026789419390156584, 0.003204042250968067, ... \n",
- "4 [0.004080952615862665, 0.005105364274822891, 0... \n",
- "\n",
- " h \\\n",
- "0 [14.201069696784895, 14.20544123215772, 14.216... \n",
- "1 [5.163950489872683, 5.179473698114745, 5.21578... \n",
- "2 [6.878002515584569, 6.8926516980488675, 6.9274... \n",
- "3 [89.9587466410539, 89.96445676642922, 89.97267... \n",
- "4 [93.83912881523764, 93.83954779804499, 93.8423... \n",
- "\n",
- " h_error \\\n",
- "0 [95.26810423257896, 95.27849857026263, 95.2846... \n",
- "1 [104.30522343949119, 104.3044661043056, 104.28... \n",
- "2 [102.59117141377929, 102.59128810437149, 102.5... \n",
- "3 [19.510427288309955, 19.51948303599113, 19.528... \n",
- "4 [15.630045114126219, 15.644392004375362, 15.65... \n",
- "\n",
- " temps \\\n",
- "0 [1.0, 3.9291338582677167, 6.858267716535433, 9... \n",
- "1 [1.0, 3.9291338582677167, 6.858267716535433, 9... \n",
- "2 [1.0, 3.9291338582677167, 6.858267716535433, 9... \n",
- "3 [1.0, 3.9291338582677167, 6.858267716535433, 9... \n",
- "4 [1.0, 3.9291338582677167, 6.858267716535433, 9... \n",
- "\n",
- " vib_freqs \\\n",
- "0 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
- "1 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
- "2 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
- "3 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.446... \n",
- "4 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
- "\n",
- " vib_errors vib_mae maxstep \\\n",
- "0 [365.26521782818406, 345.1085529599917, 194.17... 1027.154434 NaN \n",
- "1 [136.3752974454976, 116.1372867519512, 97.3719... 1113.227006 NaN \n",
- "2 [96.75265542338498, 90.41924803705203, 74.0875... 1092.500033 NaN \n",
- "3 [111.37319602907411, 108.7601947486991, 66.937... 263.944079 NaN \n",
- "4 [118.00864162563397, 110.49254554846664, 100.8... 237.597413 NaN \n",
- "\n",
- " lower \n",
- "0 NaN \n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN "
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "all_results.head()"
- ]
- },
{
"cell_type": "code",
"execution_count": 9,
@@ -466,10 +283,10 @@
{
"data": {
"text/plain": [
- "along-axes 9\n",
"along-vibrational-modes 6\n",
- "random-dir-same-dist 3\n",
- "random-dir-variable-dist 3\n",
+ "along-axes 3\n",
+ "random-dir-same-dist 1\n",
+ "random-dir-variable-dist 1\n",
"Name: sampling_method, dtype: int64"
]
},
@@ -538,10 +355,10 @@
{
"data": {
"text/plain": [
- "{PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_along-axes_d=2.00e-02-N=4-increment.json'),\n",
- " PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_along-vibrational-modes_d=1.00e-03-N=64-maxstep=6.00e-02-lower=xtb+None-increment.json'),\n",
- " PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_random-dir-same-dist_d=1.00e-02-increment.json'),\n",
- " PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_random-dir-variable-dist_d=1.00e-02-increment.json')}"
+ "{PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_at_pm7_None_along-axes_d=2.00e-02-N=4-increment.json'),\n",
+ " PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_at_pm7_None_along-vibrational-modes_d=1.00e-03-N=32-maxstep=6.00e-02-lower=xtb+None-increment.json'),\n",
+ " PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_at_pm7_None_random-dir-same-dist_d=2.00e-02-increment.json'),\n",
+ " PosixPath('../2_testing-fitting-strategies/data/mbtr/caffeine_pm7_None_at_pm7_None_random-dir-variable-dist_d=2.00e-02-increment.json')}"
]
},
"execution_count": 12,
@@ -583,7 +400,7 @@
"outputs": [
{
"data": {
- "image/png": "",
+ "image/png": "",
"text/plain": [
"