Skip to content

Commit

Permalink
feat: add asumed mates
Browse files Browse the repository at this point in the history
  • Loading branch information
balajtimate committed Jan 23, 2024
1 parent 7b65c43 commit 9220b6d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
33 changes: 30 additions & 3 deletions htsinfer/get_library_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@ def _evaluate_mate_relationship(
self.mapping.library_type.relationship = (
StatesTypeRelationship.split_mates
)
# Infer mate relationship, even when assumed to be single
elif (
self.results.file_1 == StatesType.single and
self.results.file_2 == StatesType.single
) and (
self.library_source.file_1.short_name is not None or
self.library_source.file_2.short_name is not None
):
LOGGER.debug("Determining mate relationship by alignment...")
self.mapping.library_type.relationship \
= StatesTypeRelationship.not_available
self.mapping.library_source = self.library_source
self.mapping.paths = self.path_1, self.path_2
self.mapping.evaluate()
self._align_mates()
elif (
self.library_source.file_1.short_name is not None or
self.library_source.file_2.short_name is not None
Expand Down Expand Up @@ -209,15 +224,15 @@ def _align_mates(self):
LOGGER.debug(f"Number of aligned reads file 2: {aligned_mate2}")
LOGGER.debug(f"Number of concordant reads: {concordant}")

self._update_relationship(
self._update_relationship_type(
concordant, min(aligned_mate1, aligned_mate2)
)

samfile1.close()
samfile2.close()

def _update_relationship(self, concordant, aligned_reads):
"""Helper function to update relationship based on alignment."""
def _update_relationship_type(self, concordant, aligned_reads):
"""Helper function to update relationship and type."""
try:
ratio = concordant / aligned_reads
except ZeroDivisionError:
Expand All @@ -238,6 +253,18 @@ def _update_relationship(self, concordant, aligned_reads):
self.results.relationship = (
StatesTypeRelationship.not_mates
)
if self.results.relationship == (
StatesTypeRelationship.split_mates
) and (
self.results.file_1 == StatesType.single and
self.results.file_2 == StatesType.single
) or (
self.results.file_1 == StatesType.not_available and
self.results.file_2 == StatesType.not_available
):
# Update first and second relationship
self.results.file_1 = StatesType.first_mate_assumed
self.results.file_2 = StatesType.second_mate_assumed

class AlignedSegment:
"""Placeholder class for mypy "Missing attribute"
Expand Down
4 changes: 3 additions & 1 deletion htsinfer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,11 @@ class StatesType(Enum):
that the library represents a single-end library.
"""
first_mate = "first_mate"
first_mate_assumed = "first_mate_assumed"
mixed_mates = "mixed_mates"
not_available = None
second_mate = "second_mate"
second_mate_assumed = "second_mate_assumed"
single = "single"


Expand Down Expand Up @@ -438,7 +440,7 @@ class Args(BaseModel):
lib_source_min_match_pct: float = 2
lib_source_min_freq_ratio: float = 2
lib_type_max_distance: int = 1000
lib_type_mates_cutoff: float = 0.95
lib_type_mates_cutoff: float = 0.85
read_orientation_min_mapped_reads: int = 20
read_orientation_min_fraction: float = 0.75
path_1_processed: Path = Path()
Expand Down

0 comments on commit 9220b6d

Please sign in to comment.