Skip to content

Commit

Permalink
Merge pull request #565 from npklein/master
Browse files Browse the repository at this point in the history
Make HtseqCount work
  • Loading branch information
freerkvandijk authored Jul 1, 2016
2 parents 7e12a85 + a9c9251 commit 2dfa24c
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 107 deletions.
23 changes: 23 additions & 0 deletions compute5/ASE/chromosomes_noSex.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CHR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
step,protocol,dependencies
GenotypeGvcfs_individual,protocols/GatkGenotypeGvcfs_individual.sh,HaplotypeCallerGvcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sed '/^#/ d'< "$1" > "$1.tmp"
awk '{printf "%s"",",$1}' FS="," "$1.tmp" > "$2"
perl -pi -e 's/,$/\n/g' "$2"
awk '{printf "%s"",",$2}' FS="," "$1.tmp" >> "$2"
perl -pi -e 's/,$/\n/g' "$2"
rm "$1.tmp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
########################################################################
## General parameters for SLURM settings and cluster directory paths
#
queue,ll
defaultInterpreter,#!/bin/bash
stage,module load
checkStage,module list
WORKDIR,/groups/
root,${WORKDIR}
group,umcg-wijmenga
tmp,tmp04
resDir,/groups/umcg-wijmenga/tmp04/resources/
toolDir,/apps/software/
projectDir,${root}/${group}/${tmp}/projects/umcg-ndeklein/${project}/results/
uniqueID,${sampleName}_${internalId}
platform,ILLUMINA
########################################################################
## Software and data versions/builds
#
kallistoVersion,0.42.2.1-goolf-1.7.20
samtoolsVersion,1.3-foss-2015b
htseqVersion,0.6.1p1
########################################################################
## Specific tools paths
#
## Kallisto
kallistoIndex,/groups/umcg-pub/tmp04/public-rna-seq/kallisto/hg19.v75.cdna.all.42.2.idx
kallistoDir,${projectDir}/kallisto/
fragmentLength,200
## HtSeq
stranded,reverse
annotationGtf,/apps/data/ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf
htseqDir,${projectDir}/htSeq/
htseqTxtOutput,${htseqDir}/${uniqueID}.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
queue,defaultInterpreter,stage,checkStage,WORKDIR,root,group,tmp,resDir,toolDir,projectDir,uniqueID,platform,kallistoVersion,samtoolsVersion,htseqVersion,kallistoIndex,kallistoDir,fragmentLength,stranded,annotationGtf,htseqDir,htseqTxtOutput
ll,#!/bin/bash,module load,module list,/groups/,${WORKDIR},umcg-wijmenga,tmp04,/groups/umcg-wijmenga/tmp04/resources/,/apps/software/,${root}/${group}/${tmp}/projects/umcg-ndeklein/${project}/results/,${sampleName}_${internalId},ILLUMINA,0.42.2.1-goolf-1.7.20,1.3-foss-2015b,0.6.1p1,/groups/umcg-pub/tmp04/public-rna-seq/kallisto/hg19.v75.cdna.all.42.2.idx,${projectDir}/kallisto/,200,yes,/apps/data/ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf,${projectDir}/htSeq/,${htseqDir}/${uniqueID}.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,24 @@ group,umcg-wijmenga
tmp,tmp04
resDir,/groups/umcg-wijmenga/tmp04/resources/
toolDir,/apps/software/
projectDir,${root}/${group}/${tmp}/projects/${project}/
projectDir,${root}/${group}/${tmp}/projects/umcg-ndeklein/${project}/results/
uniqueID,${sampleName}_${internalId}
platform,ILLUMINA
########################################################################
## Software and data versions/builds
#
kallistoVersion,0.42.2.1-goolf-1.7.20
samtoolsVersion,1.3-foss-2015b
htseqVersion,0.6.1p1
########################################################################
## Specific tools paths
#
## Kallisto
kallistoIndex,/groups/umcg-pub/tmp04/public-rna-seq/kallisto/hg19.v75.cdna.all.42.2.idx
kallistoDir,${projectDir}/kallisto/
fragmentLength,200
fragmentLength,200
## HtSeq
stranded,yes
annotationGtf,/apps/data/ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf
htseqDir,${projectDir}/htSeq/
htseqTxtOutput,${htseqDir}/${uniqueID}.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
queue,defaultInterpreter,stage,checkStage,WORKDIR,root,group,tmp,resDir,toolDir,projectDir,uniqueID,platform,kallistoVersion,samtoolsVersion,htseqVersion,kallistoIndex,kallistoDir,fragmentLength,stranded,annotationGtf,htseqDir,htseqTxtOutput
ll,#!/bin/bash,module load,module list,/groups/,${WORKDIR},umcg-wijmenga,tmp04,/groups/umcg-wijmenga/tmp04/resources/,/apps/software/,${root}/${group}/${tmp}/projects/${project}/results.,${sampleName}_${internalId},ILLUMINA,0.42.2.1-goolf-1.7.20,1.3-foss-2015b,0.6.1p1,/groups/umcg-pub/tmp04/public-rna-seq/kallisto/hg19.v75.cdna.all.42.2.idx,${projectDir}/kallisto/,200,reverse,/apps/data/ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf,${projectDir}/htSeq/,${htseqDir}/${uniqueID}.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
########################################################################
## General parameters for SLURM settings and cluster directory paths
#
queue,ll
defaultInterpreter,#!/bin/bash
stage,module load
checkStage,module list
WORKDIR,/groups/
root,${WORKDIR}
group,umcg-wijmenga
tmp,tmp04
resDir,/groups/umcg-wijmenga/tmp04/resources/
toolDir,/apps/software/
projectDir,${root}/${group}/${tmp}/projects/umcg-ndeklein/${project}/results/
uniqueID,${sampleName}_${internalId}
platform,ILLUMINA
########################################################################
## Software and data versions/builds
#
kallistoVersion,0.42.2.1-goolf-1.7.20
samtoolsVersion,1.3-foss-2015b
htseqVersion,0.6.1p1
########################################################################
## Specific tools paths
#
## Kallisto
kallistoIndex,/groups/umcg-pub/tmp04/public-rna-seq/kallisto/hg19.v75.cdna.all.42.2.idx
kallistoDir,${projectDir}/kallisto/
fragmentLength,200
## HtSeq
stranded,reverse
annotationGtf,/apps/data/ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf
htseqDir,${projectDir}/htSeq/
htseqTxtOutput,${htseqDir}/${uniqueID}.txt

This file was deleted.

63 changes: 0 additions & 63 deletions compute5/Public_RNA-seq_quantification/protocols/HTSeq_count.sh

This file was deleted.

93 changes: 56 additions & 37 deletions compute5/Public_RNA-seq_quantification/protocols/HtseqCount.sh
Original file line number Diff line number Diff line change
@@ -1,42 +1,61 @@
#MOLGENIS nodes=1 ppn=1 mem=6gb walltime=23:59:00

#Parameter mapping #why not string foo,bar? instead of string foo\nstring bar
#string stage
#string checkStage
#string WORKDIR
#string projectDir

#string markDuplicatesBam
#string markDuplicatesBai
#string genomeEnsembleAnnotationFile
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6gb

#Parameter mapping
#string bam
#string annotationGtf
#string htseqTxtOutput
#string samtoolsVersion
#string htseqVersion
#string htseqCountDir
#string htseqCountCounts

echo "## "$(date)" ## $0 Started "


getFile ${markDuplicatesBam}
getFile ${markDuplicatesBai}

${stage} HTSeq/${htseqVersion}
${stage} SAMtools/${samtoolsVersion}
${checkStage}

set -x
set -e

mkdir -p ${htseqCountDir}

samtools view -h ${markDuplicatesBam} | $EBROOTHTSEQ/scripts/htseq-count -m union -s no -t exon -i gene_id - ${genomeEnsembleAnnotationFile} > ${htseqCountCounts}

putFile ${htseqCountCounts}

if [ ! -z "$PBS_JOBID" ]; then
echo "## "$(date)" Collecting PBS job statistics"
qstat -f $PBS_JOBID
#string stranded
#string htseqDir

#Echo parameter values
bam="${bam}"
annotationGtf="${annotationGtf}"
htseqTxtOutput="${htseqTxtOutput}"

echo -e "bam=${bam}\nannotationGtf=${annotationGtf}\nhtseqTxtOutput=${htseqTxtOutput}"

module load SAMtools/${samtoolsVersion}
module load HTSeq/${htseqVersion}
module list

echo "Sorting bam file by name"
mkdir -p ${htseqDir}
if samtools \
sort \
-n \
-o ${TMPDIR}/nameSorted.bam \
${bam}
then
echo "bam file sorted"
else
echo "Failed to sort bam file"
rm -f ${TMPDIR}/nameSorted.bam
exit 1
fi
ls ${TMPDIR}
echo -e "\nQuantifying expression"

if samtools \
view -h \
${TMPDIR}/nameSorted.bam | \
htseq-count \
-m union \
--stranded ${stranded} \
- \
${annotationGtf} | \
head -n -5 \
> ${htseqTxtOutput}___tmp___;
then
echo "Gene count succesfull"
mv ${htseqTxtOutput}___tmp___ ${htseqTxtOutput}
else
echo "Genecount failed"
rm -f ${TMPDIR}/nameSorted.bam
exit 1
fi

echo "## "$(date)" ## $0 Done "
rm ${TMPDIR}/nameSorted.bam

echo "Finished!"
6 changes: 3 additions & 3 deletions compute5/Public_RNA-seq_quantification/samplesheet1.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
internalId,project,sampleName,reads1FqGz,reads2FqGz
ID_01,sample_project,sample01,sample1_reads1.fq.gz,sample1_reads2.fq.gz
ID_02,sample_project,sample02,sample2_reads2.fq.gz,sample2_reads2.fq.gz
internalId,project,sampleName,reads1FqGz,reads2FqGz,bam
ID_01,sample_project,sample01,sample1_reads1.fq.gz,sample1_reads2.fq.gz,sample1_aligned_sorted.bam
ID_02,sample_project,sample02,sample2_reads2.fq.gz,sample2_reads2.fq.gz,sample2_aligned_sorted.bam

0 comments on commit 2dfa24c

Please sign in to comment.