Skip to content

Commit

Permalink
Add ability to deploy on HPC (#31)
Browse files Browse the repository at this point in the history
* Send whole atoms record back

* Run ASE inside a temporary directory

Avoid calcualtions from intefering with each other

* Test with >1 worker

* Add option for loading parsl configuration

* Name the run directory based on hessian method

* Test the parsl config loader
  • Loading branch information
WardLT authored Dec 7, 2023
1 parent 499685d commit fa9a179
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 21 deletions.
21 changes: 17 additions & 4 deletions jitterbug/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from colmena.task_server import ParslTaskServer
from parsl import Config, HighThroughputExecutor

from jitterbug.parsl import get_energy
from jitterbug.parsl import get_energy, load_configuration
from jitterbug.thinkers.exact import ExactHessianThinker

logger = logging.getLogger(__name__)
Expand All @@ -26,6 +26,7 @@ def main(args: Optional[list[str]] = None):
parser.add_argument('--method', nargs=2, required=True,
help='Method to use to compute energies. Format: [method] [basis]. Example: B3LYP 6-31g*')
parser.add_argument('--exact', help='Compute Hessian using numerical derivatives', action='store_true')
parser.add_argument('--parsl-config', help='Path to the Parsl configuration to use')
args = parser.parse_args(args)

# Load the structure
Expand All @@ -35,7 +36,11 @@ def main(args: Optional[list[str]] = None):

# Make the run directory
method, basis = (x.lower() for x in args.method)
run_dir = Path('run') / xyz_name / f'{method}_{basis}'
if args.exact:
compute_name = 'exact'
else:
raise NotImplementedError()
run_dir = Path('run') / xyz_name / f'{method}_{basis}_{compute_name}'
run_dir.mkdir(parents=True, exist_ok=True)

# Start logging
Expand All @@ -55,6 +60,15 @@ def main(args: Optional[list[str]] = None):
(run_dir / xyz_path.name).write_text(xyz_path.read_text())
logger.info(f'Started run for {xyz_name} at {method}/{basis}. Run directory: {run_dir.absolute()}')

# Load Parsl configuration
if args.parsl_config is None:
config = Config(run_dir=str(run_dir / 'parsl-logs'), executors=[HighThroughputExecutor(max_workers=1)])
num_workers = 1
logger.info('Running computations locally, one-at-a-time')
else:
config, num_workers, ase_options = load_configuration(args.parsl_config)
logger.info(f'Running on {num_workers} workers as defined by {args.parsl_config}')

# Make the function to compute energy
energy_fun = partial(get_energy, method=method, basis=basis)
update_wrapper(energy_fun, get_energy)
Expand All @@ -66,14 +80,13 @@ def main(args: Optional[list[str]] = None):
queues=queues,
atoms=atoms,
run_dir=run_dir,
num_workers=1,
num_workers=num_workers,
)
functions = [] # No other functions to run
else:
raise NotImplementedError()

# Create the task server
config = Config(run_dir=str(run_dir / 'parsl-logs'), executors=[HighThroughputExecutor(max_workers=1)])
task_server = ParslTaskServer([energy_fun] + functions, queues, config)

# Run everything
Expand Down
51 changes: 46 additions & 5 deletions jitterbug/parsl.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
"""Wrappers for functions compatible with the Parsl workflow engine"""
import os
from tempfile import TemporaryDirectory
from typing import Optional
from pathlib import Path

from parsl import Config
import ase

from jitterbug.utils import make_calculator
from jitterbug.utils import make_calculator, write_to_string


def get_energy(atoms: ase.Atoms, method: str, basis: Optional[str], **kwargs) -> float:
def get_energy(atoms: ase.Atoms, method: str, basis: Optional[str], scratch_dir: Optional[str] = None, **kwargs) -> str:
"""Compute the energy of an atomic structure
Keyword arguments are passed to :meth:`make_calculator`.
Expand All @@ -15,9 +19,46 @@ def get_energy(atoms: ase.Atoms, method: str, basis: Optional[str], **kwargs) ->
atoms: Structure to evaluate
method: Name of the method to use (e.g., B3LYP)
basis: Basis set to use (e.g., cc-PVTZ)
scratch_dir: Path to the scratch directory.
Returns:
Energy (units: eV)
Atoms record serialized with the energy and any other data produced by the calculator
"""

calc = make_calculator(method, basis, **kwargs)
return calc.get_potential_energy(atoms)
# Make a temporary directory
start_dir = Path.cwd()
tmp = TemporaryDirectory(dir=scratch_dir, prefix='jitterbug_')
try:
os.chdir(tmp.name)
calc = make_calculator(method, basis, directory=tmp.name, **kwargs)
atoms.calc = calc
atoms.get_potential_energy()
return write_to_string(atoms, 'json')
finally:
os.chdir(start_dir)
tmp.cleanup()


def load_configuration(path: os.PathLike, function_name: str = 'make_config') -> tuple[Config, int, dict]:
"""Load a configuration from a file
Loads a function which produces a Parsl configuration object,
a number of workers to use for the computation,
and a dictionary of settings to pass to the ASE calculator.
Args:
path: Path to the configuration file
function_name: Which function from the configuration file to call.
Returns:
- Parsl configuration
- Worker count
- ASE option dictionary
"""

spec_ns = {}
path = Path(path)
exec(path.read_text(), spec_ns)
if function_name not in spec_ns:
raise ValueError(f'Cannot find the function "{function_name}" in {path}')

# Execute it
return spec_ns[function_name]()
15 changes: 10 additions & 5 deletions jitterbug/thinkers/exact.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from colmena.queue import ColmenaQueues
from colmena.thinker import BaseThinker, ResourceCounter, agent, result_processor

from jitterbug.utils import read_from_string


class ExactHessianThinker(BaseThinker):
"""Schedule the calculation of a complete set of numerical derivatives"""
Expand Down Expand Up @@ -132,11 +134,14 @@ def store_energy(self, result: Result):
return

calc_type = result.task_info['type']
atoms = read_from_string(result.value, 'json')
energy = atoms.get_potential_energy()

# Store unperturbed energy
if calc_type == 'unperturbed':
self.logger.info('Storing energy of unperturbed structure')
self.unperturbed_energy = result.value
self.energy_path.write_text(str(result.value))
self.unperturbed_energy = energy
self.energy_path.write_text(str(energy))
return

# Store perturbed energy
Expand All @@ -151,13 +156,13 @@ def store_energy(self, result: Result):

with energy_file.open('a') as fp:
csv_writer = writer(fp)
csv_writer.writerow(coord + [result.value])
csv_writer.writerow(coord + [energy])

energies[tuple(coord)] = result.value
energies[tuple(coord)] = energy
if calc_type == 'double':
sym_coord = list(coord)
sym_coord[:3], sym_coord[3:] = coord[3:], coord[:3]
energies[tuple(sym_coord)] = result.value
energies[tuple(sym_coord)] = energy

def compute_hessian(self) -> np.ndarray:
"""Compute the Hessian using finite differences
Expand Down
34 changes: 33 additions & 1 deletion jitterbug/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Utility functions"""
from typing import Optional

from io import StringIO

from ase.calculators.calculator import Calculator
from ase.calculators.mopac import MOPAC
from ase.calculators.psi4 import Psi4
from ase import Atoms, io

mopac_methods = ['pm7']
"""List of methods for which we will use MOPAC"""
Expand Down Expand Up @@ -38,3 +39,34 @@ def make_calculator(method: str, basis: Optional[str], **kwargs) -> Calculator:
return XTB()
else:
return Psi4(method=method, basis=basis, **kwargs)


# Taken from ExaMol
def write_to_string(atoms: Atoms, fmt: str, **kwargs) -> str:
"""Write an ASE atoms object to string
Args:
atoms: Structure to write
fmt: Target format
kwargs: Passed to the write function
Returns:
Structure written in target format
"""

out = StringIO()
atoms.write(out, fmt, **kwargs)
return out.getvalue()


def read_from_string(atoms_msg: str, fmt: str) -> Atoms:
"""Read an ASE atoms object from a string
Args:
atoms_msg: String format of the object to read
fmt: Format (cannot be autodetected)
Returns:
Parsed atoms object
"""

out = StringIO(str(atoms_msg)) # str() ensures that Proxies are resolved
return io.read(out, format=fmt)
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
_file_dir = Path(__file__).parent / 'files'


@fixture()
def file_dir():
return _file_dir


@fixture()
def xyz_path():
return _file_dir / 'water.xyz'
5 changes: 5 additions & 0 deletions tests/files/example_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from parsl import Config, HighThroughputExecutor


def make_config():
return Config(executors=[HighThroughputExecutor(max_workers=1)]), 1, {}
12 changes: 11 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,14 @@ def test_exact_solver(xyz_path):
main([
str(xyz_path), '--exact', '--method', 'pm7', 'None'
])
assert (Path('run') / 'water' / 'pm7_none' / 'hessian.npy').exists()
assert (Path('run') / 'water' / 'pm7_none_exact' / 'hessian.npy').exists()


def test_parsl_path(xyz_path, file_dir):
with open(devnull, 'w') as fo:
with redirect_stdout(fo):
main([
str(xyz_path), '--exact', '--method', 'pm7', 'None',
'--parsl-config', str(file_dir / 'example_config.py')
])
assert (Path('run') / 'water' / 'pm7_none_exact' / 'hessian.npy').exists()
21 changes: 19 additions & 2 deletions tests/test_parsl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
from pathlib import Path

from ase.io import read

from jitterbug.parsl import get_energy
from jitterbug.parsl import get_energy, load_configuration
from jitterbug.utils import read_from_string


def test_energy(xyz_path):
mopac_out = Path('mopac.out')
mopac_out.unlink(missing_ok=True)

atoms = read(xyz_path)
get_energy(atoms, 'pm7', None)
atoms_msg = get_energy(atoms, 'pm7', None)
new_atoms = read_from_string(atoms_msg, 'json')
assert 'energy' in new_atoms.calc.results

assert not mopac_out.exists()


def test_load(file_dir):
config, workers, options = load_configuration(file_dir / 'example_config.py')
assert config.executors[0].max_workers == 1
assert workers == 1
assert options == {}
7 changes: 4 additions & 3 deletions tests/test_thinkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from colmena.queue.python import PipeQueues
from colmena.task_server.parsl import ParslTaskServer
from parsl import Config, HighThroughputExecutor
from pytest import fixture
from pytest import fixture, mark

from jitterbug.compare import compare_hessians
from jitterbug.parsl import get_energy
Expand Down Expand Up @@ -36,7 +36,7 @@ def task_server(queues):
update_wrapper(energy_func, get_energy)

# Make the task server
config = Config(executors=[HighThroughputExecutor(max_workers=1)])
config = Config(executors=[HighThroughputExecutor(max_workers=2)])
server = ParslTaskServer([energy_func], queues, config)

# Run and then kill when tests are complete
Expand All @@ -46,13 +46,14 @@ def task_server(queues):
server.join()


@mark.timeout(60)
def test_exact(xyz_path, queues, tmpdir, ase_hessian):
# Make the thinker
atoms = read(xyz_path)
run_path = Path(tmpdir) / 'run'
thinker = ExactHessianThinker(
queues=queues,
num_workers=1,
num_workers=2,
atoms=atoms,
run_dir=run_path,
)
Expand Down

0 comments on commit fa9a179

Please sign in to comment.