diff --git a/docs/simulator.md b/docs/simulator.md new file mode 100644 index 00000000..c3dc4f2b --- /dev/null +++ b/docs/simulator.md @@ -0,0 +1,66 @@ +# Simulator + +`ispyb.simulate` creates a new DataCollection row in the ISPyB database from a simple yaml definition. It creates a data collection, related sample information, and associated shipping entities. It then copies some raw data and associated snapshots (and thumbnails). + +Simulate a data collection: + +```bash +ispyb.simulate +ispyb.simulate bm23 energy_scan1 +``` + +The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` + +## Configuration + +The configuration file location is defined via the `SIMULATE_CONFIG` environment variable. An example configuration is available in `examples/simulation.yml`. The structure and requirements of this file are documented in the example. + +Each entry in `experiments` represents a different data collection. The `experimentType` column relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See [experimentType](https://github.com/ispyb/ispyb-database/blob/main/schema/1_tables.sql#L1518)s for a full list. + +## Available columns per table + +The ISPyB tables are large, and as such only a subset of the columns are exposed by this simulator, the most pertinent in order to create usable data collections and associated entries. These are as listed below for each table. + +### Component (Protein) + +- acronym +- name +- sequence +- density +- molecularMass +- description + +### BLSample + +- name + +### BLSubSample + +- x +- y +- x2 +- y2 +- type + +### DataCollection + +- imageContainerSubPath +- numberOfImages +- wavelength +- exposureTime +- xtalSnapshotFullPath1-4 + +### GridInfo + +- steps_x +- steps_y +- snapshot_offsetXPixel +- snapshot_offsetYPixel +- dx_mm +- dy_mm +- pixelsPerMicronX +- pixelsPerMicronY + +## Plugins + +The simulator can trigger events before and after the data is copied using the `ispyb.simulator.before_datacollection` and `ispyb.simulator.after_datacollection` entry points. These are passed just the new `DataCollection.dataCollectionId`. diff --git a/examples/simulation.yml b/examples/simulation.yml new file mode 100644 index 00000000..69aa86ef --- /dev/null +++ b/examples/simulation.yml @@ -0,0 +1,91 @@ +# Whether to link or copy data +copy_method: copy + +# Map each beamline to a session +sessions: + bl: blc00001-1 + +# Where to copy raw data from +raw_data: /data/ispyb-test + +# Where to write simulated data to, can use {beamline} placeholder +data_dir: /data/tests/{beamline}/simulation + +ispyb_url: https://ispyb.esrf.fr + +# Define Components (Proteins) +components: + # an internal reference for the component + comp1: + # columns to populate for this component + acronym: Component1 + sequence: SiSP + molecularMass: 12.5 + + comp2: + acronym: Component2 + +# Define BLSamples +samples: + # an internal reference for this sample + samp1: + # columns to populate for this sample + name: Sample1 + # which component this sample is an instance of (one of the keys in components above) + component: comp1 + + samp2: + name: Sample2 + component: comp2 + +# Define Experiments (DataCollections) +experiments: + # a shortname for this experiment (available via cli) + energy_scan1: + # the experimentType, must map to a valid type in DataCollectionGroup.experimentType + experimentType: OSC + # data will be split into its respective imageDirectory and fileTemplate columns + data: osc/oscillation.h5 + # which sample to link this data collection to (one of the keys in samples above) + sample: samp1 + + # columns to populate + # xtalSnapshot thumbnails should have a trailing t + # Fullsize image: osc/snapshot1.png + # Thumbnail: osc/snapshot1t.png + xtalSnapshotFullPath1: osc/snapshot1.png + numberOfImages: 4001 + exposureTime: 1 + #energy: 8.8143 + wavelength: 1.4065 + imageContainerSubPath: 1.1/measurement + + xrf_map1: + experimentType: Mesh + data: mesh/mesh.h5 + sample: samp1 + + xtalSnapshotFullPath1: mesh/snapshot1.png + numberOfImages: 1600 + exposureTime: 0.03 + #energy: 2.4817 + wavelength: 4.9959 + + # additionally populate GridInfo + grid: + steps_x: 40 + steps_y: 40 + dx_mm: 0.001 + dy_mm: 0.001 + pixelsPerMicronX: -0.44994 + pixelsPerMicronY: -0.46537 + snapshot_offsetXPixel: 682.16 + snapshot_offsetYPixel: 554 + + # additionally populate BlSubSample + subsample: + x: 9038007 + y: 24467003 + x2: 9078007 + y2: 24507003 + type: roi diff --git a/mkdocs.yml b/mkdocs.yml index 29065eec..c2cbb937 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,6 +8,7 @@ nav: - Routes: routes.md - ⧉ Tests coverage: https://app.codecov.io/gh/ispyb/py-ispyb/ - ⧉ Endpoints documentation: https://ispyb.github.io/py-ispyb/api/ + - Simulator: simulator.md theme: name: material features: diff --git a/pyispyb/app/extensions/database/definitions.py b/pyispyb/app/extensions/database/definitions.py index 3c379833..c857a993 100644 --- a/pyispyb/app/extensions/database/definitions.py +++ b/pyispyb/app/extensions/database/definitions.py @@ -14,6 +14,10 @@ models.BLSession.visit_number, ).label("session") +_proposal = sqlalchemy.func.concat( + models.Proposal.proposalCode, models.Proposal.proposalNumber +).label("proposal") + def get_blsession(session: str) -> Optional[models.BLSession]: return ( diff --git a/pyispyb/config.py b/pyispyb/config.py index f6dd2dab..ace66654 100644 --- a/pyispyb/config.py +++ b/pyispyb/config.py @@ -65,6 +65,8 @@ class Settings(BaseSettings): cors: bool = False + simulation_config: str = None + class Config: env_file = get_env_file() diff --git a/pyispyb/simulation/__init__.py b/pyispyb/simulation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyispyb/simulation/base.py b/pyispyb/simulation/base.py new file mode 100644 index 00000000..4d45722b --- /dev/null +++ b/pyispyb/simulation/base.py @@ -0,0 +1,79 @@ +from abc import ABC, abstractmethod +from contextlib import contextmanager +import logging +import os +import pkg_resources +from typing import Any + +import yaml + +from ..config import settings +from ..app.extensions.database.session import _session + +logger = logging.getLogger(__name__) + + +def load_config() -> dict[str, Any]: + if not settings.simulation_config: + raise RuntimeError("`SIMULATION_CONFIG` environment variable is not defined") + + if not os.path.exists(settings.simulation_config): + raise AttributeError(f"Cannot find config file: `{settings.simulation_config}`") + + config = {} + with open(settings.simulation_config, "r") as stream: + config = yaml.safe_load(stream) + + return config + + +class Simulation(ABC): + def __init__(self): + self._config = load_config() + + @property + def config(self) -> dict[str, Any]: + return self._config + + @contextmanager + def session(self): + db_session = _session() + try: + yield db_session + db_session.commit() + except Exception as e: # noqa + db_session.rollback() + raise + finally: + db_session.close() + + @property + def beamlines(self) -> list[str]: + return self.config["sessions"].keys() + + @property + def experiment_types(self) -> list[str]: + return self.config["experiments"].keys() + + def before_start(self, dataCollectionId: int) -> None: + for entry in pkg_resources.iter_entry_points( + "ispyb.simulator.before_datacollection" + ): + fn = entry.load() + logger.info(f"Executing before start plugin `{entry.name}`") + fn(dataCollectionId) + + def after_end(self, dataCollectionId: int) -> None: + for entry in pkg_resources.iter_entry_points( + "ispyb.simulator.after_datacollection" + ): + fn = entry.load() + logger.info(f"Executing after end plugin `{entry.name}`") + fn(dataCollectionId) + + def do_run(self, *args, **kwargs) -> None: + self.run(*args, **kwargs) + + @abstractmethod + def run(self, *args, **kwargs) -> None: + pass diff --git a/pyispyb/simulation/cli.py b/pyispyb/simulation/cli.py new file mode 100644 index 00000000..b4a6b7f0 --- /dev/null +++ b/pyispyb/simulation/cli.py @@ -0,0 +1,51 @@ +import argparse +import logging + +from .datacollection import SimulateDataCollection + + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def run() -> None: + try: + sdc = SimulateDataCollection() + except AttributeError as e: + exit(f"Simulation Error: {e}") + + parser = argparse.ArgumentParser(description="ISPyB simulation tool") + parser.add_argument( + "beamline", help="Beamline to run simulation against", choices=sdc.beamlines + ) + + parser.add_argument( + "experiment", help="Experiment to simluate", choices=sdc.experiment_types + ) + + parser.add_argument( + "--delay", + default=5, + type=int, + dest="delay", + help="Delay between plugin start and end events", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug output", + ) + + args = parser.parse_args() + + root = logging.getLogger() + root.setLevel(level=logging.DEBUG if args.debug else logging.INFO) + + try: + sdc.do_run(args.beamline, args.experiment, delay=args.delay) + except Exception as e: + if args.debug: + logger.exception("Simulation Error") + print(e) + else: + print(f"Simulation Error: {e}") diff --git a/pyispyb/simulation/datacollection.py b/pyispyb/simulation/datacollection.py new file mode 100644 index 00000000..d1fca4e0 --- /dev/null +++ b/pyispyb/simulation/datacollection.py @@ -0,0 +1,390 @@ +from datetime import datetime, timedelta +import logging +import os +import shutil +import time + +from ispyb import models + +from ..app.extensions.database.definitions import _proposal, _session +from .base import Simulation + + +logger = logging.getLogger(__name__) + + +class SimulateDataCollection(Simulation): + def _get_container_position( + self, ses, blsession: str, proposalId: str, beamline: str + ) -> list[int, int]: + shipment_name = "Simulation_Shipment" + shipment = ( + ses.query(models.Shipping) + .filter(models.Shipping.proposalId == proposalId) + .filter(models.Shipping.shippingName == shipment_name) + .first() + ) + + if not shipment: + logger.debug("Creating shipment") + shipment = models.Shipping( + shippingName=shipment_name, + proposalId=proposalId, + creationDate=datetime.now(), + ) + + ses.add(shipment) + ses.commit() + + dewar_name = "Simulation_Dewar" + dewar = ( + ses.query(models.Dewar.dewarId) + .filter(models.Dewar.shippingId == shipment.shippingId) + .filter(models.Dewar.code == dewar_name) + .first() + ) + + if not dewar: + logger.debug("Creating dewar") + dewar = models.Dewar( + shippingId=shipment.shippingId, + code=dewar_name, + dewarStatus="processing", + ) + ses.add(dewar) + ses.commit() + + container_name = "Simulation_Container" + container = ( + ses.query(models.Container.containerId) + .filter(models.Container.dewarId == dewar.dewarId) + .filter(models.Container.code == container_name) + .first() + ) + + if not container: + logger.debug("Creating container") + container = models.Container( + dewarId=dewar.dewarId, + code=container_name, + containerType="Box", + capacity=25, + bltimeStamp=datetime.now(), + containerStatus="at facility", + # beamlineLocation=beamline, + # sampleChangerLocation=1, + ) + ses.add(container) + ses.commit() + + containerhistory = models.ContainerHistory( + containerId=container.containerId, + status="at facility", + location=1, + beamlineName=beamline, + ) + + ses.add(containerhistory) + ses.commit() + + samples = ( + ses.query(models.BLSample) + .filter(models.BLSample.containerId == container.containerId) + .all() + ) + max_loc = 0 + for s in samples: + if int(s.location) > max_loc: + max_loc = int(s.location) + + return container.containerId, max_loc + 1 + + def run(self, beamline: str, experiment: str, delay=0): + blses: str = self.config["sessions"][beamline] + + if experiment not in self.config["experiments"]: + raise KeyError(f"No such experiment {experiment}") + + exp = self.config["experiments"][experiment] + data = os.path.join(self.config["raw_data"], exp["data"]) + + if not exp.get("experimentType"): + raise KeyError( + f"Experiment `{experiment}` does not specify `experimentType`" + ) + + if not os.path.exists(data): + raise AttributeError(f"Raw data file: `{data}` does not exist") + + if not exp.get("sample"): + raise KeyError(f"No sample specified for experiment `{experiment}`") + + if exp["sample"] not in self.config["samples"]: + raise KeyError( + f"Experiment sample `{exp['sample']}` is not defined in `samples`" + ) + + sample = self.config["samples"][exp["sample"]] + + with self.session() as ses: + prop, blsession = ( + ses.query(_proposal, models.BLSession) + .join( + models.Proposal, + models.Proposal.proposalId == models.BLSession.proposalId, + ) + .filter(_session == blses) + .first() + ) + + blsample = ( + ses.query(models.BLSample) + .filter(models.BLSample.name == sample["name"]) + .first() + ) + + if not blsample: + for k in ["component", "name"]: + if not sample.get(k): + raise KeyError(f"No {k} specified for sample {exp['sample']}") + + if sample["component"] not in self.config["components"]: + raise KeyError( + f"Sample component {sample['component']} is not defined in `components`" + ) + + comp = self.config["components"][sample["component"]] + for k in ["acronym"]: + if not comp.get(k): + raise KeyError( + f"No {k} specified for component {sample['component']}" + ) + + component = ( + ses.query(models.Protein) + .filter(models.Protein.acronym == comp["acronym"]) + .first() + ) + + if not component: + logger.info(f"Creating component {comp['acronym']}") + component = models.Protein( + proposalId=blsession.proposalId, + acronym=comp.get("acronym"), + name=comp.get("name", comp.get("acronym")), + sequence=comp.get("sequence"), + density=comp.get("density"), + molecularMass=comp.get("molecularMass"), + description="Simulated component", + ) + ses.add(component) + ses.commit() + + crystal = models.Crystal(proteinId=component.proteinId) + ses.add(crystal) + ses.commit() + + logger.info(f"Creating sample {sample['name']}") + containerid, position = self._get_container_position( + ses, blses, blsession.proposalId, beamline + ) + blsample = models.BLSample( + name=sample["name"], + crystalId=crystal.crystalId, + location=position, + containerId=containerid, + ) + ses.add(blsample) + ses.commit() + + subsampleid = None + if exp.get("subsample"): + logger.info("Creating subsample") + sub = exp["subsample"] + + pos1id = None + if sub.get("x") and sub.get("y"): + pos1 = models.Position( + posX=sub["x"], + posY=sub["y"], + ) + ses.add(pos1) + ses.commit() + + pos1id = pos1.positionId + + pos2id = None + if sub.get("x2") and sub.get("y2"): + pos2 = models.Position( + posX=sub["x2"], + posY=sub["y2"], + ) + ses.add(pos2) + ses.commit() + + pos2id = pos2.positionId + + subsample = models.BLSubSample( + positionId=pos1id, + position2Id=pos2id, + blSampleId=blsample.blSampleId, + comments="Simulated sample", + ) + + if hasattr(subsample, "type"): + subsample.type = sub.get("type") + + ses.add(subsample) + ses.commit() + + subsampleid = subsample.blSubSampleId + + startTime = datetime.now() + endTime = datetime.now() + timedelta(minutes=5) + + logger.debug("Creating datacollection group") + dcg = models.DataCollectionGroup( + sessionId=blsession.sessionId, + experimentType=exp["experimentType"], + blSampleId=blsample.blSampleId, + startTime=startTime, + endTime=endTime, + ) + ses.add(dcg) + ses.commit() + + logger.debug("Creating datacollection") + dc = models.DataCollection( + blSubSampleId=subsampleid, + dataCollectionGroupId=dcg.dataCollectionGroupId, + fileTemplate=os.path.basename(exp["data"]), + imageContainerSubPath=exp.get( + "imageContainerSubPath", "1.1/measurement" + ), + numberOfImages=exp.get("numberOfImages"), + wavelength=exp.get("wavelength"), + exposureTime=exp.get("exposureTime"), + runStatus="Successful", + comments="Simulated datacollection", + startTime=startTime, + endTime=endTime, + ) + + # Deprecated + if hasattr(dc, "BLSAMPLEID"): + dc.BLSAMPLEID = blsample.blSampleId + + ses.add(dc) + ses.commit() + + if exp.get("grid"): + logger.debug("Creating gridinfo") + grid = models.GridInfo( + steps_x=exp["grid"]["steps_x"], + steps_y=exp["grid"]["steps_y"], + dx_mm=exp["grid"]["dx_mm"], + dy_mm=exp["grid"]["dy_mm"], + ) + + if hasattr(grid, "snapshot_offsetXPixel"): + grid.snapshot_offsetXPixel = exp["grid"]["snapshot_offsetXPixel"] + grid.snapshot_offsetYPixel = exp["grid"]["snapshot_offsetYPixel"] + + if hasattr(grid, "pixelsPerMicronX"): + grid.pixelsPerMicronX = exp["grid"]["pixelsPerMicronX"] + grid.pixelsPerMicronY = exp["grid"]["pixelsPerMicronY"] + + if hasattr(grid, "dataCollectionId"): + grid.dataCollectionId = dc.dataCollectionId + # Deprecated but needed for pydb + else: + grid.dataCollectionGroupId = dcg.dataCollectionGroupId + + ses.add(grid) + ses.commit() + + logger.info(f"Created datacollection: `{dc.dataCollectionId}`") + logger.info( + f"{self.config['ispyb_url']}/visit/{blses}/id/{dc.dataCollectionId}" + ) + + logger.info("Triggering before start plugins") + self.before_start(dc.dataCollectionId) + + # Create the dataset dir + data_dir = os.path.join( + self.config["data_dir"].format(beamline=beamline), + prop, + exp["sample"], + f"{exp['sample']}_{dc.dataCollectionId}", + ) + + dc.imageDirectory = data_dir + ses.commit() + + if os.path.exists(data_dir): + logger.warning(f"Data directory already exists: {data_dir}") + + os.makedirs(data_dir) + if not os.path.exists(data_dir): + raise AttributeError( + f"Could not create output data directory: {data_dir}" + ) + + # Link data files / snapshots + link = self.config.get("copy_method", "copy") == "link" + if link: + logger.debug("Linking data") + os.link(data, os.path.join(data_dir, os.path.basename(data))) + else: + logger.debug("Copying data") + shutil.copy(data, os.path.join(data_dir, os.path.basename(data))) + + snapshot_path = os.path.join( + self.config["raw_data"], exp.get("xtalSnapshotFullPath1") + ) + if snapshot_path: + if os.path.exists(snapshot_path): + snapshot = os.path.join(data_dir, os.path.basename(snapshot_path)) + if link: + logger.debug("Linking snapshot") + os.link(snapshot_path, snapshot) + else: + logger.debug("Copying snapshot") + shutil.copy(snapshot_path, snapshot) + + snap, snap_extension = os.path.splitext(snapshot_path) + thumb = f"{snap}t{snap_extension}" + if os.path.exists(thumb): + if link: + logger.debug("Linking thumbnail") + os.link( + thumb, + os.path.join( + data_dir, + f"{os.path.basename(snap)}t{snap_extension}", + ), + ) + else: + logger.debug("Copying thumbnail") + shutil.copy( + thumb, + os.path.join( + data_dir, + f"{os.path.basename(snap)}t{snap_extension}", + ), + ) + else: + logger.warning(f"Snapshot thumbnail does not exist {thumb}") + + dc.xtalSnapshotFullPath1 = snapshot + else: + logger.warning(f"Snapshot file does not exist {snapshot_path}") + + logger.info(f"Finshed copying data to: {data_dir}") + + if delay: + time.sleep(delay) + + logger.info("Triggering after end plugins") + self.after_end(dc.dataCollectionId) diff --git a/setup.cfg b/setup.cfg index 5dde630d..59fb36a4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,6 @@ package_dir = pyispyb = pyispyb install_requires = ispyb-models == 0.0.3 - python >= 3.10 fastapi pydantic[dotenv] uvicorn @@ -51,6 +50,10 @@ install_requires = exclude = tests +[options.entry_points] +console_scripts = + ispyb.simulate = pyispyb.simulation.cli:run + [bdist_wheel] universal = true