From 438df70c9a295f08242137166d92e7789d24cc96 Mon Sep 17 00:00:00 2001 From: michaelpanciera Date: Wed, 6 Apr 2016 12:52:49 -0400 Subject: [PATCH 1/2] host_map counts unmapped only if both mates unmapped --- pathdiscov/host_map/bwa_filter_host.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pathdiscov/host_map/bwa_filter_host.sh b/pathdiscov/host_map/bwa_filter_host.sh index c40ba25c..c00ceecb 100755 --- a/pathdiscov/host_map/bwa_filter_host.sh +++ b/pathdiscov/host_map/bwa_filter_host.sh @@ -11,7 +11,7 @@ db=$5 # database paired=$6 # paired - 1 if paired data, 0 if not wellpaired=$7 # wellpaired - 1 if paired data "well-paired" (i.e., mate files have exact same IDs), 0 if not opts=$8 # options - +droppair=1 if ! cd ${outputdir}; then echo "[error] changing directories"; @@ -46,10 +46,17 @@ else echo "[stats] paired stats" samtools flagstat paired.bam + + if [ "$droppair" == 1 ]; then + samtools view paired.bam -f 12 | cut -f1 | sort -u | awk '{print "@"$0}' > paired.unmap.id + else + #could maybe use samtools view paired.bam -f 4 . . . instead + cat paired.sam | awk '$3=="*"' | cut -f1 | sort -u | awk '{print "@"$0}' > paired.unmap.id + fi + - # get unmapped read IDs - cat paired.sam | awk '$3=="*"' | cut -f1 | sort -u | awk '{print "@"$0}' > paired.unmap.id rm paired.sam + # get unmapped read IDs # extract the 4 line chunks from a fastq file, given in argument 1 file, for IDs give in argument 2 file cmd="fastq_extract_id.pl ${r1} paired.unmap.id > R1.unmap.fastq" From 48d4eb3a67d95bde99e5d761c15e7ee36c2e5666 Mon Sep 17 00:00:00 2001 From: Mike Panciera Date: Thu, 7 Apr 2016 10:27:38 -0400 Subject: [PATCH 2/2] add comments --- pathdiscov/host_map/bwa_filter_host.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pathdiscov/host_map/bwa_filter_host.sh b/pathdiscov/host_map/bwa_filter_host.sh index c00ceecb..c5a1765e 100755 --- a/pathdiscov/host_map/bwa_filter_host.sh +++ b/pathdiscov/host_map/bwa_filter_host.sh @@ -48,8 +48,11 @@ else samtools flagstat paired.bam if [ "$droppair" == 1 ]; then + + # Extracts reads where both themselves are unmapped and their mate is unmapped samtools view paired.bam -f 12 | cut -f1 | sort -u | awk '{print "@"$0}' > paired.unmap.id else + # Extracts reads which are themselves unmapped (mate may or may not be mapped) #could maybe use samtools view paired.bam -f 4 . . . instead cat paired.sam | awk '$3=="*"' | cut -f1 | sort -u | awk '{print "@"$0}' > paired.unmap.id fi