Skip to content

Commit

Permalink
feat: output read orientation fractions to json (#169)
Browse files Browse the repository at this point in the history
* feat: add read orient single json output

* feat: add read orient single json output

* update single orientation json

* feat: add read orient paired json output

* refactor: simplify read orientation functions

* add pylint exception

* minor refactor get_read_orientation
  • Loading branch information
balajtimate authored Oct 9, 2024
1 parent d3cbe0e commit a081c35
Showing 1 changed file with 112 additions and 10 deletions.
122 changes: 112 additions & 10 deletions htsinfer/get_read_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import (Any, DefaultDict, Dict, List)

import pysam # type: ignore
import pandas as pd # type: ignore

from htsinfer.exceptions import (
FileProblem,
Expand Down Expand Up @@ -58,6 +59,7 @@ def __init__(
self.library_source = config.results.library_source
self.transcripts_file = config.args.t_file_processed
self.tmp_dir = config.args.tmp_dir
self.out_dir = config.args.out_dir
self.min_mapped_reads = config.args.read_orientation_min_mapped_reads
self.min_fraction = config.args.read_orientation_min_fraction
self.mapping = mapping
Expand Down Expand Up @@ -176,13 +178,20 @@ def process_single(
else:
orientation = StatesOrientation.unstranded

# write log messages and return result
orient_df = self.create_orient_df(
reads, fractions_all_states, orientation, paired=False
)

LOGGER.debug(
f"Required number of mapped reads pairs: {self.min_mapped_reads}"
f"Required number of mapped reads: {self.min_mapped_reads}"
)
LOGGER.debug(f"Number of reads mapped: {reads}")
LOGGER.debug(f"Fraction of states: {fractions_all_states}")
LOGGER.debug(f"Orientation: {orientation}")
LOGGER.debug(f"Number of mapped reads: {orient_df.iloc[0, 0]}")
LOGGER.debug(f"Fraction of SF: {orient_df.iloc[0, 1]}")
LOGGER.debug(f"Fraction of SR: {orient_df.iloc[0, 2]}")
LOGGER.debug(f"Orientation: {orient_df.iloc[0, 3]}")

self.write_orientation_to_json(orient_df, self.paths[0].name)

return orientation

def process_paired( # pylint: disable=R0912,R0915
Expand Down Expand Up @@ -293,13 +302,32 @@ def process_paired( # pylint: disable=R0912,R0915
orientation.file_1 = StatesOrientation.unstranded
orientation.file_2 = StatesOrientation.unstranded

# write log messages and return result
orient_df_1 = self.create_orient_df(
reads, fractions_all_states, orientation, paired=True, file_index=1
)
orient_df_2 = self.create_orient_df(
reads, fractions_all_states, orientation, paired=True, file_index=2
)

LOGGER.debug(
f"Required number of mapped read pairs: {self.min_mapped_reads}"
f"Required number of mapped reads: {self.min_mapped_reads}"
)
LOGGER.debug(f"Number of reads mapped: {reads}")
LOGGER.debug(f"Fraction of states: {fractions_all_states}")
LOGGER.debug(f"Orientation: {orientation}")
LOGGER.debug(f"Number of mapped reads: {orient_df_1.iloc[0, 0]}")
LOGGER.debug(f"Fraction of ISF: {orient_df_1.iloc[0, 1]}")
LOGGER.debug(f"Fraction of ISR: {orient_df_1.iloc[0, 2]}")
LOGGER.debug(f"Orientation file 1: {orient_df_1.iloc[0, 3]}")
LOGGER.debug(f"Orientation file 2: {orient_df_2.iloc[0, 3]}")
LOGGER.debug(
f"Orientation relationship: {orient_df_1.iloc[0, 4]}"
)

self.write_orientation_to_json(
orient_df_1, getattr(self.paths[0], 'name')
)
self.write_orientation_to_json(
orient_df_2, getattr(self.paths[1], 'name')
)

return orientation

@staticmethod
Expand Down Expand Up @@ -338,3 +366,77 @@ def sum_dicts(*dicts: Dict[Any, float]) -> Dict[Any, float]:
for key, num in dct.items():
result[key] += num
return dict(result)

@staticmethod
def create_orient_df(
reads,
fractions_all_states,
orientation,
paired: bool,
file_index=None
):
"""Prepare DataFrame for orientation details.
Constructs a DataFrame with information about read orientation for
single or paired-end sequencing data.
Args:
reads: Number of mapped reads.
fractions_all_states: Dictionary containing the fraction
of each orientation state.
orientation: Orientation states.
paired: Indicates if the sequencing data is paired-end.
file_index: Specifies the index of the file for paired-end data
(1 or 2). Ignored for single-end data.
Returns:
pd.DataFrame: A DataFrame containing orientation details.
"""
if paired:
data = {
'Number of mapped reads': reads,
'Fraction ISF': fractions_all_states.get(
StatesOrientationRelationship.inward_stranded_forward
),
'Fraction ISR': fractions_all_states.get(
StatesOrientationRelationship.inward_stranded_reverse
),
'Orientation': getattr(
orientation.file_1
if file_index == 1 else orientation.file_2,
'value',
None
),
'Relationship': getattr(
orientation.relationship, 'value', None
)
}
else:
data = {
'Number of mapped reads': reads,
'Fraction SF': fractions_all_states.get(
StatesOrientation.stranded_forward
),
'Fraction SR': fractions_all_states.get(
StatesOrientation.stranded_reverse
),
'Orientation': orientation.value
}
return pd.DataFrame([data])

def write_orientation_to_json(self, orient_df, filename):
"""Write orientation dataframe to a JSON file.
Serializes the provided orientation dataframe to a JSON file
with indentation.
Args:
orient_df: The dataframe containing orientation details.
filename: Name of the file to save the JSON data.
Returns:
None
"""
file_path = Path(self.out_dir) / f"read_orientation_{filename}.json"
LOGGER.debug(f"Writing results to file: {file_path}")
orient_df.to_json(file_path, orient='split', index=False, indent=True)

0 comments on commit a081c35

Please sign in to comment.