From 499685d2548e6c4c852cbf6c9fc40e8e5aac955c Mon Sep 17 00:00:00 2001 From: Logan Ward Date: Wed, 6 Dec 2023 15:05:25 -0500 Subject: [PATCH] Add a dynamic workflow (#30) * Add the workflow function * Add missing fixture * Add a workflow for exact computations * Compute the Hessian with numerical diff * Use that the matrix is symmetric * Add a CLI --- jitterbug/cli.py | 90 ++++++++++++++++ jitterbug/parsl.py | 23 ++++ jitterbug/thinkers/__init__.py | 0 jitterbug/thinkers/exact.py | 188 +++++++++++++++++++++++++++++++++ jitterbug/utils.py | 2 +- pyproject.toml | 3 + tests/conftest.py | 10 ++ tests/files/water.xyz | 5 + tests/test_cli.py | 14 +++ tests/test_parsl.py | 8 ++ tests/test_thinkers.py | 91 ++++++++++++++++ 11 files changed, 433 insertions(+), 1 deletion(-) create mode 100644 jitterbug/cli.py create mode 100644 jitterbug/parsl.py create mode 100644 jitterbug/thinkers/__init__.py create mode 100644 jitterbug/thinkers/exact.py create mode 100644 tests/conftest.py create mode 100644 tests/files/water.xyz create mode 100644 tests/test_cli.py create mode 100644 tests/test_parsl.py create mode 100644 tests/test_thinkers.py diff --git a/jitterbug/cli.py b/jitterbug/cli.py new file mode 100644 index 0000000..7389e38 --- /dev/null +++ b/jitterbug/cli.py @@ -0,0 +1,90 @@ +"""Command line interface to running Jitterbug on a new molecule""" +from argparse import ArgumentParser +from functools import partial, update_wrapper +from pathlib import Path +from typing import Optional +import logging +import sys + +import numpy as np +from ase.io import read +from colmena.queue import PipeQueues +from colmena.task_server import ParslTaskServer +from parsl import Config, HighThroughputExecutor + +from jitterbug.parsl import get_energy +from jitterbug.thinkers.exact import ExactHessianThinker + +logger = logging.getLogger(__name__) + + +def main(args: Optional[list[str]] = None): + """Run Jitterbug""" + + parser = ArgumentParser() + parser.add_argument('xyz', help='Path to the XYZ file') + parser.add_argument('--method', nargs=2, required=True, + help='Method to use to compute energies. Format: [method] [basis]. Example: B3LYP 6-31g*') + parser.add_argument('--exact', help='Compute Hessian using numerical derivatives', action='store_true') + args = parser.parse_args(args) + + # Load the structure + xyz_path = Path(args.xyz) + atoms = read(args.xyz) + xyz_name = xyz_path.with_suffix('').name + + # Make the run directory + method, basis = (x.lower() for x in args.method) + run_dir = Path('run') / xyz_name / f'{method}_{basis}' + run_dir.mkdir(parents=True, exist_ok=True) + + # Start logging + handlers = [logging.StreamHandler(sys.stdout), logging.FileHandler(run_dir / 'run.log', mode='a')] + for handler in handlers: + handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + + for logger_name in ['jitterbug']: + my_logger = logging.getLogger(logger_name) + for handler in handlers: + my_logger.addHandler(handler) + my_logger.setLevel(logging.INFO) + + # Write the XYZ file to the run directory + if (run_dir / xyz_path.name).exists() and (run_dir / xyz_path.name).read_text() != xyz_path.read_text(): + raise ValueError('Run exists for a different structure with the same name.') + (run_dir / xyz_path.name).write_text(xyz_path.read_text()) + logger.info(f'Started run for {xyz_name} at {method}/{basis}. Run directory: {run_dir.absolute()}') + + # Make the function to compute energy + energy_fun = partial(get_energy, method=method, basis=basis) + update_wrapper(energy_fun, get_energy) + + # Create a thinker + queues = PipeQueues(topics=['simulation']) + if args.exact: + thinker = ExactHessianThinker( + queues=queues, + atoms=atoms, + run_dir=run_dir, + num_workers=1, + ) + functions = [] # No other functions to run + else: + raise NotImplementedError() + + # Create the task server + config = Config(run_dir=str(run_dir / 'parsl-logs'), executors=[HighThroughputExecutor(max_workers=1)]) + task_server = ParslTaskServer([energy_fun] + functions, queues, config) + + # Run everything + try: + task_server.start() + thinker.run() + finally: + queues.send_kill_signal() + + # Get the Hessian + hessian = thinker.compute_hessian() + hess_path = run_dir / 'hessian.npy' + np.save(hess_path, hessian) + logger.info(f'Wrote Hessian to {hess_path}') diff --git a/jitterbug/parsl.py b/jitterbug/parsl.py new file mode 100644 index 0000000..117a951 --- /dev/null +++ b/jitterbug/parsl.py @@ -0,0 +1,23 @@ +"""Wrappers for functions compatible with the Parsl workflow engine""" +from typing import Optional + +import ase + +from jitterbug.utils import make_calculator + + +def get_energy(atoms: ase.Atoms, method: str, basis: Optional[str], **kwargs) -> float: + """Compute the energy of an atomic structure + + Keyword arguments are passed to :meth:`make_calculator`. + + Args: + atoms: Structure to evaluate + method: Name of the method to use (e.g., B3LYP) + basis: Basis set to use (e.g., cc-PVTZ) + Returns: + Energy (units: eV) + """ + + calc = make_calculator(method, basis, **kwargs) + return calc.get_potential_energy(atoms) diff --git a/jitterbug/thinkers/__init__.py b/jitterbug/thinkers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jitterbug/thinkers/exact.py b/jitterbug/thinkers/exact.py new file mode 100644 index 0000000..bf2ad15 --- /dev/null +++ b/jitterbug/thinkers/exact.py @@ -0,0 +1,188 @@ +"""Run an exact Hessian computation""" +from csv import reader, writer +from pathlib import Path +from typing import Optional + +import ase +import numpy as np +from colmena.models import Result +from colmena.queue import ColmenaQueues +from colmena.thinker import BaseThinker, ResourceCounter, agent, result_processor + + +class ExactHessianThinker(BaseThinker): + """Schedule the calculation of a complete set of numerical derivatives""" + + def __init__(self, queues: ColmenaQueues, num_workers: int, atoms: ase.Atoms, run_dir: Path, step_size: float = 0.005): + super().__init__(queues, ResourceCounter(num_workers)) + self.atoms = atoms + + # Initialize storage for the energies + self.result_file = run_dir / 'simulation-results.json' + self.step_size = step_size + self.unperturbed_energy: Optional[float] = None + self.single_perturb = np.zeros((len(atoms), 3, 2)) * np.nan # Perturbation of a single direction. [atom_id, axis (xyz), dir_id (0 back, 1 forward)] + self.double_perturb = np.zeros((len(atoms), 3, 2, len(atoms), 3, 2)) * np.nan + # Perturbation of two directions [atom1_id, axis1, dir1_id, atom2_id, axis2 dir2_id] + + # Load what has been run already + self.run_dir = run_dir + self.run_dir.mkdir(exist_ok=True) + self.energy_path = self.run_dir / 'unperturbed.energy' + self.single_path = self.run_dir / 'single_energies.csv' + self.double_path = self.run_dir / 'double_energies.csv' + + if self.energy_path.exists(): + self.unperturbed_energy = float(self.energy_path.read_text()) + + if self.single_path.exists(): + with self.single_path.open() as fp: + count = 0 + for row in reader(fp): + index = tuple(map(int, row[:-1])) + count += 1 + self.single_perturb[index] = row[-1] + self.logger.info(f'Read {count} single perturbations out of {self.single_perturb.size}') + + if self.double_path.exists(): + with self.double_path.open() as fp: + count = 0 + for row in reader(fp): + count += 1 + # Get the index and its symmetric counterpart + index = tuple(map(int, row[:-1])) + sym_index = list(index) + sym_index[:3], sym_index[3:] = index[3:], index[:3] + self.double_perturb[index] = self.double_perturb[tuple(sym_index)] = row[-1] + self.logger.info(f'Read {count} double perturbations') + + @agent() + def submit_tasks(self): + """Submit all required tasks then start the shutdown process by exiting""" + + # Start with the unperturbed energy + if self.unperturbed_energy is None: + self.queues.send_inputs( + self.atoms, + method='get_energy', + task_info={'type': 'unperturbed'} + ) + + # Submit the single perturbations + with np.nditer(self.single_perturb, flags=['multi_index']) as it: + count = 0 + for x in it: + # Skip if done + if np.isfinite(x): + continue + + # Submit if not done + self.rec.acquire(None, 1) + count += 1 + atom_id, axis_id, dir_id = it.multi_index + + new_atoms = self.atoms.copy() + new_atoms.positions[atom_id, axis_id] += self.step_size - 2 * self.step_size * dir_id + self.queues.send_inputs( + new_atoms, + method='get_energy', + task_info={'type': 'single', 'coord': it.multi_index} + ) + self.logger.info(f'Finished submitting {count} single perturbations') + + # Submit the double perturbations + with np.nditer(self.double_perturb, flags=['multi_index']) as it: + count = 0 + for x in it: + # Skip if done + if np.isfinite(x): + continue + + # Skip if perturbing the same direction twice, or if from the lower triangle + if it.multi_index[:2] == it.multi_index[3:5] or it.multi_index[:3] < it.multi_index[3:]: + continue + + # Submit if not done + self.rec.acquire(None, 1) + count += 1 + + # Perturb two axes + new_atoms = self.atoms.copy() + for atom_id, axis_id, dir_id in [it.multi_index[:3], it.multi_index[3:]]: + new_atoms.positions[atom_id, axis_id] += self.step_size - 2 * self.step_size * dir_id + + self.queues.send_inputs( + new_atoms, + method='get_energy', + task_info={'type': 'double', 'coord': it.multi_index} + ) + self.logger.info(f'Finished submitting {count} double perturbations') + + @result_processor + def store_energy(self, result: Result): + """Store the energy in the appropriate files""" + self.rec.release() + + # Store the result object to disk + with self.result_file.open('a') as fp: + print(result.json(exclude={'inputs'}), file=fp) + + if not result.success: + self.logger.warning(f'Calculation failed due to {result.failure_info.exception}') + return + + calc_type = result.task_info['type'] + # Store unperturbed energy + if calc_type == 'unperturbed': + self.logger.info('Storing energy of unperturbed structure') + self.unperturbed_energy = result.value + self.energy_path.write_text(str(result.value)) + return + + # Store perturbed energy + coord = result.task_info['coord'] + self.logger.info(f'Saving a {calc_type} perturbation: ({",".join(map(str, coord))})') + if calc_type == 'single': + energy_file = self.single_path + energies = self.single_perturb + else: + energy_file = self.double_path + energies = self.double_perturb + + with energy_file.open('a') as fp: + csv_writer = writer(fp) + csv_writer.writerow(coord + [result.value]) + + energies[tuple(coord)] = result.value + if calc_type == 'double': + sym_coord = list(coord) + sym_coord[:3], sym_coord[3:] = coord[3:], coord[:3] + energies[tuple(sym_coord)] = result.value + + def compute_hessian(self) -> np.ndarray: + """Compute the Hessian using finite differences + + Returns: + Hessian in the 2D form + Raises: + (ValueError) If there is missing data + """ + + # Check that all data are available + n_atoms = len(self.atoms) + if not np.isfinite(self.single_perturb).all(): + raise ValueError(f'Missing {np.isnan(self.single_perturb).sum()} single perturbations') + expected_double = self.double_perturb.size - (4 * n_atoms ** 2) + if not np.isfinite(self.double_perturb).sum() == expected_double: + raise ValueError(f'Missing {expected_double - np.isfinite(self.double_perturb).sum()} double perturbations') + + # Flatten the arrays + single_flat = np.reshape(self.single_perturb, (n_atoms * 3, 2)) + double_flat = np.reshape(self.double_perturb, (n_atoms * 3, 2, n_atoms * 3, 2)) + + # Compute the finite differences + # https://en.wikipedia.org/wiki/Finite_difference#Multivariate_finite_differences + diagonal = (single_flat.sum(axis=1) - self.unperturbed_energy * 2) / (self.step_size ** 2) + off_diagonal = (double_flat[:, 0, :, 0] + double_flat[:, 1, :, 1] - double_flat[:, 0, :, 1] - double_flat[:, 1, :, 0]) / (4 * self.step_size ** 2) + np.fill_diagonal(off_diagonal, 0) + return np.diag(diagonal) + off_diagonal diff --git a/jitterbug/utils.py b/jitterbug/utils.py index 77673a5..568220a 100644 --- a/jitterbug/utils.py +++ b/jitterbug/utils.py @@ -30,7 +30,7 @@ def make_calculator(method: str, basis: Optional[str], **kwargs) -> Calculator: """ if method in mopac_methods: - if not (basis is None or basis == "None"): + if not (basis is None or basis.lower() == "none"): raise ValueError(f'Basis must be none for method: {method}') return MOPAC(method=method, command='mopac PREFIX.mop > /dev/null') elif method == 'xtb': diff --git a/pyproject.toml b/pyproject.toml index 738120b..8deaa27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,3 +45,6 @@ test = [ 'pytest-timeout', 'pytest-cov', ] + +[project.scripts] +jitterbug = "jitterbug.cli:main" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5329698 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +from pathlib import Path + +from pytest import fixture + +_file_dir = Path(__file__).parent / 'files' + + +@fixture() +def xyz_path(): + return _file_dir / 'water.xyz' diff --git a/tests/files/water.xyz b/tests/files/water.xyz new file mode 100644 index 0000000..1de00a3 --- /dev/null +++ b/tests/files/water.xyz @@ -0,0 +1,5 @@ +3 +Properties=species:S:1:pos:R:3:forces:R:3 energy=-2080.5785031368773 pbc="F F F" +O 2.53690888 -0.34072954 0.00000000 0.00346641 0.00474413 0.00000000 +H 3.29702983 0.24793463 0.00000000 0.00004343 -0.00355994 -0.00000000 +H 1.77686228 0.24789366 -0.00000000 -0.00350905 -0.00087620 0.00000000 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..927e5c4 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,14 @@ +from contextlib import redirect_stdout +from os import devnull +from pathlib import Path + +from jitterbug.cli import main + + +def test_exact_solver(xyz_path): + with open(devnull, 'w') as fo: + with redirect_stdout(fo): + main([ + str(xyz_path), '--exact', '--method', 'pm7', 'None' + ]) + assert (Path('run') / 'water' / 'pm7_none' / 'hessian.npy').exists() diff --git a/tests/test_parsl.py b/tests/test_parsl.py new file mode 100644 index 0000000..364dd00 --- /dev/null +++ b/tests/test_parsl.py @@ -0,0 +1,8 @@ +from ase.io import read + +from jitterbug.parsl import get_energy + + +def test_energy(xyz_path): + atoms = read(xyz_path) + get_energy(atoms, 'pm7', None) diff --git a/tests/test_thinkers.py b/tests/test_thinkers.py new file mode 100644 index 0000000..740bcc8 --- /dev/null +++ b/tests/test_thinkers.py @@ -0,0 +1,91 @@ +from functools import partial, update_wrapper +from pathlib import Path + +import numpy as np +from ase.io import read +from ase.vibrations import Vibrations +from colmena.queue.python import PipeQueues +from colmena.task_server.parsl import ParslTaskServer +from parsl import Config, HighThroughputExecutor +from pytest import fixture + +from jitterbug.compare import compare_hessians +from jitterbug.parsl import get_energy +from jitterbug.thinkers.exact import ExactHessianThinker +from jitterbug.utils import make_calculator + + +@fixture() +def queues(): + return PipeQueues(topics='simulation') + + +@fixture() +def ase_hessian(xyz_path, tmp_path) -> np.ndarray: + atoms = read(xyz_path) + atoms.calc = make_calculator('pm7', None) + vib = Vibrations(atoms, delta=0.005, name=str(Path(tmp_path) / 'vib')) + vib.run() + return vib.get_vibrations().get_hessian_2d() + + +@fixture(autouse=True) +def task_server(queues): + # Make the function + energy_func = partial(get_energy, method='pm7', basis=None) + update_wrapper(energy_func, get_energy) + + # Make the task server + config = Config(executors=[HighThroughputExecutor(max_workers=1)]) + server = ParslTaskServer([energy_func], queues, config) + + # Run and then kill when tests are complete + server.start() + yield server + queues.send_kill_signal() + server.join() + + +def test_exact(xyz_path, queues, tmpdir, ase_hessian): + # Make the thinker + atoms = read(xyz_path) + run_path = Path(tmpdir) / 'run' + thinker = ExactHessianThinker( + queues=queues, + num_workers=1, + atoms=atoms, + run_dir=run_path, + ) + assert run_path.exists() + assert np.isnan(thinker.double_perturb).all() + assert thinker.double_perturb.size == (3 ** 4 * 4) + + # Run it + thinker.run() + assert np.isfinite(thinker.single_perturb).all() + + # Make sure it picks up from a previous run + thinker = ExactHessianThinker( + queues=queues, + num_workers=1, + atoms=atoms, + run_dir=run_path, + ) + assert np.isfinite(thinker.single_perturb).all() + assert np.isfinite(thinker.double_perturb).any() + assert thinker.unperturbed_energy is not None + + # Make sure it doesn't do any new calculations + start_size = (run_path / 'simulation-results.json').stat().st_size + thinker.run() + end_size = (run_path / 'simulation-results.json').stat().st_size + assert start_size == end_size + + # Compute the Hessian + hessian = thinker.compute_hessian() + assert np.isfinite(hessian).all() + assert hessian.shape == (9, 9) + + # Make sure it is close to ase's + comparison = compare_hessians(atoms, hessian, ase_hessian) + assert abs(comparison.zpe_error) < 0.05