From 9465f6e46335ae3eea3be790f73026558c3c4cda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gordon=20J=2E=20K=C3=B6hn?= Date: Fri, 20 Dec 2024 14:02:49 +0100 Subject: [PATCH] add metadata to vpipe module --- .gitignore | 3 +++ scripts/vp_transformer.py | 26 +------------------------- src/sr2silo/__init__.py | 4 ++++ src/sr2silo/vpipe/__init__.py | 4 ++++ src/sr2silo/vpipe/metadata.py | 28 ++++++++++++++++++++++++++++ tests/vpipe/test_metadata.py | 18 ++++++++++++++++++ 6 files changed, 58 insertions(+), 25 deletions(-) create mode 100644 src/sr2silo/vpipe/__init__.py create mode 100644 src/sr2silo/vpipe/metadata.py create mode 100644 tests/vpipe/test_metadata.py diff --git a/.gitignore b/.gitignore index 1faa6fe..1691913 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,6 @@ poetry.lock # Output folder output + +# Secret files +secrets diff --git a/scripts/vp_transformer.py b/scripts/vp_transformer.py index 88c395f..ae3b039 100644 --- a/scripts/vp_transformer.py +++ b/scripts/vp_transformer.py @@ -18,6 +18,7 @@ from sr2silo.process import pair_normalize_reads from sr2silo.s3 import compress_bz2, upload_file_to_s3 from sr2silo.translation import translate +from sr2silo.vpipe.metadata import sample_id_decoder logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s" @@ -37,31 +38,6 @@ def load_config(config_file: Path) -> dict: raise -def sample_id_decoder(sample_id: str) -> dict: - """Decode the sample ID into individual components. - - Args: - sample_id (str): The sample ID to decode. - - Returns: - dict: A dictionary containing the decoded components. - containing the following keys: - - sequencing_well_position (str : sequencing well position) - - location_code (int : code of the location) - - sampling_date (str : date of the sampling) - """ - components = sample_id.split("_") - # Assign components to meaningful variable names - well_position = components[0] # A1 - location_code = components[1] # 10 - sampling_date = f"{components[2]}-{components[3]}-{components[4]}" # 2024-09-30 - return { - "sequencing_well_position": well_position, - "location_code": location_code, - "sampling_date": sampling_date, - } - - def batch_id_decoder(batch_id: str) -> dict: """Decode the batch ID into individual components. diff --git a/src/sr2silo/__init__.py b/src/sr2silo/__init__.py index 775d0ba..bbab54f 100644 --- a/src/sr2silo/__init__.py +++ b/src/sr2silo/__init__.py @@ -5,4 +5,8 @@ """sr2silo connects pairs, normalizes reads, and converts BAM to SAM files.""" from __future__ import annotations +import sr2silo.vpipe as vpipe + __version__ = "0.0.2" + +__all__ = ["vpipe"] diff --git a/src/sr2silo/vpipe/__init__.py b/src/sr2silo/vpipe/__init__.py new file mode 100644 index 0000000..397901f --- /dev/null +++ b/src/sr2silo/vpipe/__init__.py @@ -0,0 +1,4 @@ +"""Implements V-Pipe specific utilities. + + i.e. extracting metadata from V-Pipe Filenaming Conventions. + """ diff --git a/src/sr2silo/vpipe/metadata.py b/src/sr2silo/vpipe/metadata.py new file mode 100644 index 0000000..8dee106 --- /dev/null +++ b/src/sr2silo/vpipe/metadata.py @@ -0,0 +1,28 @@ +"""Extract metadata from V-Pipe Filenaming Conventions.""" + +from __future__ import annotations + + +def sample_id_decoder(sample_id: str) -> dict: + """Decode the sample ID into individual components. + + Args: + sample_id (str): The sample ID to decode. + + Returns: + dict: A dictionary containing the decoded components. + containing the following keys: + - sequencing_well_position (str : sequencing well position) + - location_code (int : code of the location) + - sampling_date (str : date of the sampling) + """ + components = sample_id.split("_") + # Assign components to meaningful variable names + well_position = components[0] # A1 + location_code = components[1] # 10 + sampling_date = f"{components[2]}-{components[3]}-{components[4]}" # 2024-09-30 + return { + "sequencing_well_position": well_position, + "location_code": location_code, + "sampling_date": sampling_date, + } diff --git a/tests/vpipe/test_metadata.py b/tests/vpipe/test_metadata.py new file mode 100644 index 0000000..156c287 --- /dev/null +++ b/tests/vpipe/test_metadata.py @@ -0,0 +1,18 @@ +"""Implement tests for the metadata extraction functions.""" + + +from __future__ import annotations + +from sr2silo.vpipe.metadata import sample_id_decoder + + +def test_sample_id_decoder(): + """Test the sample_id_decoder function.""" + sample_id = "A1_10_2024_09_30" + result = sample_id_decoder(sample_id) + expected = { + "sequencing_well_position": "A1", + "location_code": "10", + "sampling_date": "2024-09-30", + } + assert result == expected