Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: infer library type relationship #157

Merged
merged 15 commits into from
Jan 21, 2024
Merged
79 changes: 44 additions & 35 deletions htsinfer/get_library_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _evaluate_mate_relationship(
ids_2: As `ids_1` but for the putative second mate file.
"""
self.results.relationship = StatesTypeRelationship.not_mates
if ids_1 == ids_2 and len(ids_1) > 0 and len(ids_2) > 0:
if ids_1 and ids_2 and ids_1 == ids_2:
if (
self.results.file_1 == StatesType.first_mate and
self.results.file_2 == StatesType.second_mate
Expand All @@ -140,8 +140,7 @@ def _evaluate_mate_relationship(
else:
self.results.relationship = StatesTypeRelationship.not_available
LOGGER.debug(
"Library source is not determined, "
"mate relationship cannot be inferred by alignment."
"Mate relationship cannot be determined."
)

def _align_mates(self):
Expand All @@ -159,57 +158,66 @@ def _align_mates(self):
previous_seq_id2 = None

reads1 = [] # List to store alignments for one read from file1
mate1 = [] # List to store alignments for each read
mate1 = [] # List to store alignments for each read from file1
reads2 = [] # List to store alignments for one read from file2
mate2 = [] # List to store alignments for each read from file2

concordant = 0

for read1 in samfile1:
if not read1.flag & (1 << 2) and isinstance(read1.query_name, str):
seq_id1 = read1.query_name
if (
seq_id1 != previous_seq_id1 and
previous_seq_id1 is not None
):
mate1.append(reads1.copy())
reads1.clear()
if read1.reference_end:
reads1.append(read1)
seq_id1 = read1.query_name
if (
seq_id1 != previous_seq_id1 and
previous_seq_id1 is not None
):
mate1.append(reads1.copy())
balajtimate marked this conversation as resolved.
Show resolved Hide resolved
reads1.clear()
if read1.reference_end:
reads1.append(read1)
previous_seq_id1 = seq_id1
mate1.append(reads1.copy())

read_counter = 0
for read2 in samfile2:
if not read2.flag & (1 << 2) and isinstance(read2.query_name, str):
seq_id2 = read2.query_name
if seq_id2 != previous_seq_id2 \
and previous_seq_id2 is not None:
if read_counter < len(mate1) and self._compare_alignments(
mate1[read_counter], reads2
):
concordant += 1
reads2.clear()
read_counter += 1
if read2.reference_end:
reads2.append(read2)
seq_id2 = read2.query_name
if (
seq_id2 != previous_seq_id2 and
previous_seq_id2 is not None
):
mate2.append(reads2.copy())
if self._compare_alignments(
mate1[read_counter], reads2
):
concordant += 1
reads2.clear()
read_counter += 1
if read2.reference_end:
reads2.append(read2)
previous_seq_id2 = seq_id2

if read_counter < len(mate1) and self._compare_alignments(
mate2.append(reads2.copy())
if self._compare_alignments(
mate1[read_counter], reads2
):
concordant += 1
LOGGER.debug(f"Number of aligned reads file 1: {len(mate1)}")
LOGGER.debug(f"Number of aligned reads file 2: {read_counter}")

aligned_mate1 = len(list(filter(None, mate1)))
aligned_mate2 = len(list(filter(None, mate2)))

LOGGER.debug(f"Number of aligned reads file 1: {aligned_mate1}")
LOGGER.debug(f"Number of aligned reads file 2: {aligned_mate2}")
LOGGER.debug(f"Number of concordant reads: {concordant}")
self._update_relationship(concordant, read_counter)

self._update_relationship(
concordant, min(aligned_mate1, aligned_mate2)
)

samfile1.close()
samfile2.close()

def _update_relationship(self, concordant, read_counter):
def _update_relationship(self, concordant, aligned_reads):
"""Helper function to update relationship based on alignment."""
try:
ratio = concordant / read_counter
ratio = concordant / aligned_reads
except ZeroDivisionError:
self.results.relationship = (
StatesTypeRelationship.not_available
Expand All @@ -219,8 +227,9 @@ def _update_relationship(self, concordant, read_counter):
self.results.relationship = (
StatesTypeRelationship.split_mates
)
self.mapping.library_type.relationship \
= StatesTypeRelationship.split_mates
self.mapping.library_type.relationship = (
StatesTypeRelationship.split_mates
)
self.mapping.mapped = False
self.mapping.star_dirs = []
else:
Expand Down
3 changes: 1 addition & 2 deletions htsinfer/get_read_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ def evaluate(self) -> ResultsOrientation:
self.mapping.evaluate()
else:
LOGGER.debug(
"Library source is not determined, "
"read orientation cannot be inferred by alignment."
"Read orientation cannot be determined."
)

return self.process_alignments(star_dirs=self.mapping.star_dirs)
Expand Down
1 change: 1 addition & 0 deletions htsinfer/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ def build_star_command(
"--runThreadN", f"{str(self.threads_star)}",
"--genomeDir", f"{str(index_dir)}",
"--outFilterMultimapNmax", "50",
"--outSAMorder", "PairedKeepInputOrder",
balajtimate marked this conversation as resolved.
Show resolved Hide resolved
"--outSAMunmapped", "Within", "KeepPairs",
]
cmd: List[str] = cmd_base[:]
Expand Down
3 changes: 2 additions & 1 deletion tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ def test_prepare_star_alignment_commands(self, tmpdir):
file1_alignment_path = tmpdir / 'alignments/file_1'
cmd = "STAR --alignIntronMax 1 --alignEndsType Local --runThreadN 1" \
+ " --genomeDir " + str(index_dir) + " --outFilterMultimapNmax " \
+ "50 --outSAMunmapped Within KeepPairs --readFilesIn " \
+ "50 --outSAMorder PairedKeepInputOrder " \
+ "--outSAMunmapped Within KeepPairs --readFilesIn " \
+ str(FILE_2000_RECORDS) + " --outFileNamePrefix " \
+ str(file1_alignment_path) + "/"
results = test_instance.prepare_star_alignment_commands(
Expand Down
Loading