From a081c35a1fe9f439e60261c8f43e70399b800bd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?=
 <51365402+balajtimate@users.noreply.github.com>
Date: Wed, 9 Oct 2024 08:59:08 +0200
Subject: [PATCH] feat: output read orientation fractions to json (#169)

* feat: add read orient single json output

* feat: add read orient single json output

* update single orientation json

* feat: add read orient paired json output

* refactor: simplify read orientation functions

* add pylint exception

* minor refactor get_read_orientation
---
 htsinfer/get_read_orientation.py | 122 ++++++++++++++++++++++++++++---
 1 file changed, 112 insertions(+), 10 deletions(-)

diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index 4b36e8b..d5bac0f 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -6,6 +6,7 @@
 from typing import (Any, DefaultDict, Dict, List)
 
 import pysam  # type: ignore
+import pandas as pd  # type: ignore
 
 from htsinfer.exceptions import (
     FileProblem,
@@ -58,6 +59,7 @@ def __init__(
         self.library_source = config.results.library_source
         self.transcripts_file = config.args.t_file_processed
         self.tmp_dir = config.args.tmp_dir
+        self.out_dir = config.args.out_dir
         self.min_mapped_reads = config.args.read_orientation_min_mapped_reads
         self.min_fraction = config.args.read_orientation_min_fraction
         self.mapping = mapping
@@ -176,13 +178,20 @@ def process_single(
             else:
                 orientation = StatesOrientation.unstranded
 
-        # write log messages and return result
+        orient_df = self.create_orient_df(
+            reads, fractions_all_states, orientation, paired=False
+        )
+
         LOGGER.debug(
-            f"Required number of mapped reads pairs: {self.min_mapped_reads}"
+            f"Required number of mapped reads: {self.min_mapped_reads}"
         )
-        LOGGER.debug(f"Number of reads mapped: {reads}")
-        LOGGER.debug(f"Fraction of states: {fractions_all_states}")
-        LOGGER.debug(f"Orientation: {orientation}")
+        LOGGER.debug(f"Number of mapped reads: {orient_df.iloc[0, 0]}")
+        LOGGER.debug(f"Fraction of SF: {orient_df.iloc[0, 1]}")
+        LOGGER.debug(f"Fraction of SR: {orient_df.iloc[0, 2]}")
+        LOGGER.debug(f"Orientation: {orient_df.iloc[0, 3]}")
+
+        self.write_orientation_to_json(orient_df, self.paths[0].name)
+
         return orientation
 
     def process_paired(  # pylint: disable=R0912,R0915
@@ -293,13 +302,32 @@ def process_paired(  # pylint: disable=R0912,R0915
                 orientation.file_1 = StatesOrientation.unstranded
                 orientation.file_2 = StatesOrientation.unstranded
 
-        # write log messages and return result
+        orient_df_1 = self.create_orient_df(
+            reads, fractions_all_states, orientation, paired=True, file_index=1
+        )
+        orient_df_2 = self.create_orient_df(
+            reads, fractions_all_states, orientation, paired=True, file_index=2
+        )
+
         LOGGER.debug(
-            f"Required number of mapped read pairs: {self.min_mapped_reads}"
+            f"Required number of mapped reads: {self.min_mapped_reads}"
         )
-        LOGGER.debug(f"Number of reads mapped: {reads}")
-        LOGGER.debug(f"Fraction of states: {fractions_all_states}")
-        LOGGER.debug(f"Orientation: {orientation}")
+        LOGGER.debug(f"Number of mapped reads: {orient_df_1.iloc[0, 0]}")
+        LOGGER.debug(f"Fraction of ISF: {orient_df_1.iloc[0, 1]}")
+        LOGGER.debug(f"Fraction of ISR: {orient_df_1.iloc[0, 2]}")
+        LOGGER.debug(f"Orientation file 1: {orient_df_1.iloc[0, 3]}")
+        LOGGER.debug(f"Orientation file 2: {orient_df_2.iloc[0, 3]}")
+        LOGGER.debug(
+            f"Orientation relationship: {orient_df_1.iloc[0, 4]}"
+        )
+
+        self.write_orientation_to_json(
+            orient_df_1, getattr(self.paths[0], 'name')
+        )
+        self.write_orientation_to_json(
+            orient_df_2, getattr(self.paths[1], 'name')
+        )
+
         return orientation
 
     @staticmethod
@@ -338,3 +366,77 @@ def sum_dicts(*dicts: Dict[Any, float]) -> Dict[Any, float]:
             for key, num in dct.items():
                 result[key] += num
         return dict(result)
+
+    @staticmethod
+    def create_orient_df(
+            reads,
+            fractions_all_states,
+            orientation,
+            paired: bool,
+            file_index=None
+    ):
+        """Prepare DataFrame for orientation details.
+
+        Constructs a DataFrame with information about read orientation for
+        single or paired-end sequencing data.
+
+        Args:
+            reads: Number of mapped reads.
+            fractions_all_states: Dictionary containing the fraction
+                of each orientation state.
+            orientation: Orientation states.
+            paired: Indicates if the sequencing data is paired-end.
+            file_index: Specifies the index of the file for paired-end data
+                (1 or 2). Ignored for single-end data.
+
+        Returns:
+            pd.DataFrame: A DataFrame containing orientation details.
+        """
+        if paired:
+            data = {
+                'Number of mapped reads': reads,
+                'Fraction ISF': fractions_all_states.get(
+                    StatesOrientationRelationship.inward_stranded_forward
+                ),
+                'Fraction ISR': fractions_all_states.get(
+                    StatesOrientationRelationship.inward_stranded_reverse
+                ),
+                'Orientation': getattr(
+                    orientation.file_1
+                    if file_index == 1 else orientation.file_2,
+                    'value',
+                    None
+                ),
+                'Relationship': getattr(
+                    orientation.relationship, 'value', None
+                )
+            }
+        else:
+            data = {
+                'Number of mapped reads': reads,
+                'Fraction SF': fractions_all_states.get(
+                    StatesOrientation.stranded_forward
+                ),
+                'Fraction SR': fractions_all_states.get(
+                    StatesOrientation.stranded_reverse
+                ),
+                'Orientation': orientation.value
+            }
+        return pd.DataFrame([data])
+
+    def write_orientation_to_json(self, orient_df, filename):
+        """Write orientation dataframe to a JSON file.
+
+        Serializes the provided orientation dataframe to a JSON file
+            with indentation.
+
+        Args:
+            orient_df: The dataframe containing orientation details.
+            filename: Name of the file to save the JSON data.
+
+        Returns:
+            None
+        """
+        file_path = Path(self.out_dir) / f"read_orientation_{filename}.json"
+        LOGGER.debug(f"Writing results to file: {file_path}")
+        orient_df.to_json(file_path, orient='split', index=False, indent=True)