diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..36f1420 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +# Use the official Miniconda3 image as a parent image +FROM continuumio/miniconda3 + +# Set the working directory in the container +WORKDIR /app + +# Copy the environment.yml file into the container at /app +COPY environment.yml /app/environment.yml + +# Create the environment and activate it +RUN conda env create -f environment.yml + +# Make RUN commands use the new environment +SHELL ["conda", "run", "-n", "sr2silo", "/bin/bash", "-c"] + +# Copy the current directory contents into the container at /app +COPY . /app + +# Install the sr2silo package +RUN pip install -e . + +# Make port 80 available to the world outside this container +EXPOSE 80 + +# Define environment variable +ENV NAME sr2silo + +# Ensure the environment is activated and run vp_deamon.py when the container launches +ENTRYPOINT ["bash", "-c", "source activate sr2silo && python scripts/vp_daemon.py --config /app/scripts/vp_config.json"] \ No newline at end of file diff --git a/README.md b/README.md index d11190a..1846758 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,9 @@ This project will wrangle short-read genomic alignments, for example from wastewater-sampling, into a format for easy import into the SILO sequencing database. +### Usage of the V-Pipe Deamon +`sr2silo` provides a daemon to process files as they arrive. See `scripts/README.md` for details. + ## Project Organization - `.github/workflows`: Contains GitHub Actions used for building, testing, and publishing. diff --git a/docker-compose.env b/docker-compose.env new file mode 100644 index 0000000..e89418d --- /dev/null +++ b/docker-compose.env @@ -0,0 +1,4 @@ +SAMPLE_DIR=../../../data/sr2silo/daemon_test/samples +TIMELINE_FILE=../../../data/sr2silo/daemon_test/timeline.tsv +DATABASE_DIR=database +BACKUP_DIR=backups \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b8a9dbc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,22 @@ +version: '3.8' + +services: + sr2silo: + build: . + volumes: + - ${SAMPLE_DIR}:/app/samples + - ${TIMELINE_FILE}:/app/timeline.tsv + - ${DATABASE_DIR}:/app/database + - ${BACKUP_DIR}:/app/backups + - ./scripts/vp_config.json:/app/scripts/vp_config.json + environment: + - PYTHONUNBUFFERED=1 + - SAMPLE_DIR=${SAMPLE_DIR} + - TIMELINE_FILE=${TIMELINE_FILE} + - DATABASE_FILE=/app/database/processed_files.db + - BACKUP_DIR=${BACKUP_DIR} + command: python scripts/vp_daemon.py --config /app/scripts/vp_config.json + +volumes: + database: + backups: diff --git a/scripts/README.md b/scripts/README.md index 95421b5..f635ff6 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,9 +2,9 @@ This directory contains scripts for processing and managing sample data. Below is an explanation of the two main scripts in this directory. -## vp_deamon.py +## vp_daemon.py -`vp_deamon.py` is a daemon script that processes new samples from the timeline file and stores the processed samples in the result directory. It performs the following tasks: +`vp_daemon.py` is a daemon script that processes new samples from the timeline file and stores the processed samples in the result directory. It performs the following tasks: 1. **Load Configuration**: Loads configuration settings from a JSON file using Pydantic for validation. 2. **Initialize Database**: Initializes a SQLite database to keep track of processed samples. @@ -17,20 +17,24 @@ This directory contains scripts for processing and managing sample data. Below i To run the daemon script, execute the following command: ```sh -python vp_deamon.py --config scripts/vp_config.json +python vp_daemon.py --config scripts/vp_config.json ``` -Ensure that the configuration file vp_config.json is present in the scripts directory with the necessary settings. +Ensure that the configuration file `vp_config.json` is present in the scripts directory with the necessary settings. ## vp_transformer.py -`vp_transformer.py` is a script that contains the core processing logic for transforming sample data. This script is used by `vp_deamon.py` to process new samples. -## Usage -This script is typically not run directly. Instead, it is imported and used by `vp_deamon.py`. +`vp_transformer.py` is a script that contains the core processing logic for transforming sample data. This script is used by `vp_daemon.py` to process new samples. + +### Usage + +This script is typically not run directly. Instead, it is imported and used by `vp_daemon.py`. ## Legacy Notice + The core processing logic in these scripts is based on the dgivec scripts, which were the foundation of this package. These scripts are retained here for legacy reasons and to ensure compatibility with existing workflows. ## Configuration + The configuration file `vp_config.json` should have the following structure: ```json @@ -41,15 +45,25 @@ The configuration file `vp_config.json` should have the following structure: "nextclade_reference": "The reference to use for nextclade.", "database_file": "The path to the database file.", "backup_dir": "The directory where the backups are stored.", - "deamon_interval_m": "The interval in minutes to run the daemon." + "daemon_interval_m": "The interval in minutes to run the daemon." } ``` + - `sample_dir`: The directory where the samples are stored. - `timeline_file`: The path to the timeline file. - `result_dir`: The directory where the results are stored. - `nextclade_reference`: The reference to use for nextclade. - `database_file`: The path to the database file. - `backup_dir`: The directory where the backups are stored. -- `deamon_interval_m`: The interval in minutes to run the daemon. +- `daemon_interval_m`: The interval in minutes to run the daemon. Ensure that all paths are correctly set in the configuration file before running the scripts. + +## Run from Docker + +Adjust paths in `docker-compose.env` to match the `scripts/vp_config.json`, then run with: + +```sh +docker-compose --env-file docker-compose.env build +docker-compose --env-file docker-compose.env up +``` diff --git a/scripts/vp_config.json b/scripts/vp_config.json index 57b4c82..fc39f92 100644 --- a/scripts/vp_config.json +++ b/scripts/vp_config.json @@ -1,9 +1,9 @@ { - "sample_dir": "../../../data/sr2silo/deamon_test/samples/", - "result_dir": "deamon_test/results/", - "timeline_file": "../../../data/sr2silo/deamon_test/timeline.tsv", + "sample_dir": "/app/samples", + "result_dir": "daemon_test/results/", + "timeline_file": "/app/timeline.tsv", "nextclade_reference": "sars-cov-2", "database_file": "processed_files.db", "backup_dir": "backups/", "deamon_interval_m": 1 -} +} \ No newline at end of file diff --git a/scripts/vp_deamon.py b/scripts/vp_daemon.py similarity index 100% rename from scripts/vp_deamon.py rename to scripts/vp_daemon.py