Skip to content

Commit

Permalink
add metadata to vpipe module
Browse files Browse the repository at this point in the history
  • Loading branch information
Gordon J. Köhn committed Dec 20, 2024
1 parent bb5dff4 commit 9465f6e
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 25 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,6 @@ poetry.lock

# Output folder
output

# Secret files
secrets
26 changes: 1 addition & 25 deletions scripts/vp_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sr2silo.process import pair_normalize_reads
from sr2silo.s3 import compress_bz2, upload_file_to_s3
from sr2silo.translation import translate
from sr2silo.vpipe.metadata import sample_id_decoder

logging.basicConfig(
level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
Expand All @@ -37,31 +38,6 @@ def load_config(config_file: Path) -> dict:
raise


def sample_id_decoder(sample_id: str) -> dict:
"""Decode the sample ID into individual components.
Args:
sample_id (str): The sample ID to decode.
Returns:
dict: A dictionary containing the decoded components.
containing the following keys:
- sequencing_well_position (str : sequencing well position)
- location_code (int : code of the location)
- sampling_date (str : date of the sampling)
"""
components = sample_id.split("_")
# Assign components to meaningful variable names
well_position = components[0] # A1
location_code = components[1] # 10
sampling_date = f"{components[2]}-{components[3]}-{components[4]}" # 2024-09-30
return {
"sequencing_well_position": well_position,
"location_code": location_code,
"sampling_date": sampling_date,
}


def batch_id_decoder(batch_id: str) -> dict:
"""Decode the batch ID into individual components.
Expand Down
4 changes: 4 additions & 0 deletions src/sr2silo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@
"""sr2silo connects pairs, normalizes reads, and converts BAM to SAM files."""
from __future__ import annotations

import sr2silo.vpipe as vpipe

__version__ = "0.0.2"

__all__ = ["vpipe"]
4 changes: 4 additions & 0 deletions src/sr2silo/vpipe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Implements V-Pipe specific utilities.
i.e. extracting metadata from V-Pipe Filenaming Conventions.
"""
28 changes: 28 additions & 0 deletions src/sr2silo/vpipe/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Extract metadata from V-Pipe Filenaming Conventions."""

from __future__ import annotations


def sample_id_decoder(sample_id: str) -> dict:
"""Decode the sample ID into individual components.
Args:
sample_id (str): The sample ID to decode.
Returns:
dict: A dictionary containing the decoded components.
containing the following keys:
- sequencing_well_position (str : sequencing well position)
- location_code (int : code of the location)
- sampling_date (str : date of the sampling)
"""
components = sample_id.split("_")
# Assign components to meaningful variable names
well_position = components[0] # A1
location_code = components[1] # 10
sampling_date = f"{components[2]}-{components[3]}-{components[4]}" # 2024-09-30
return {
"sequencing_well_position": well_position,
"location_code": location_code,
"sampling_date": sampling_date,
}
18 changes: 18 additions & 0 deletions tests/vpipe/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Implement tests for the metadata extraction functions."""


from __future__ import annotations

from sr2silo.vpipe.metadata import sample_id_decoder


def test_sample_id_decoder():
"""Test the sample_id_decoder function."""
sample_id = "A1_10_2024_09_30"
result = sample_id_decoder(sample_id)
expected = {
"sequencing_well_position": "A1",
"location_code": "10",
"sampling_date": "2024-09-30",
}
assert result == expected

0 comments on commit 9465f6e

Please sign in to comment.