Skip to content

Commit

Permalink
Merge pull request #586 from npklein/master
Browse files Browse the repository at this point in the history
GCCXX error fix, use gzipped vcf (phasing pipeline)
  • Loading branch information
freerkvandijk authored Oct 11, 2016
2 parents b8413b3 + adf14f5 commit a79238e
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 0 deletions.
67 changes: 67 additions & 0 deletions compute5/Public_RNA-seq_quantification/protocols/MergeFastq.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#MOLGENIS nodes=1 ppn=1 mem=10gb walltime=01:00:00

### variables to help adding to database (have to use weave)
#string sampleName
#string project
###
#list reads1FqGz,reads2FqGz


#Function to check if array contains value
array_contains () {
local array="$1[@]"
local seeking=$2
local in=1
for element in "${!array-}"; do
if [[ "$element" == "$seeking" ]]; then
in=0
break
fi
done
return $in
}


echo "## "$(date)" Start $0"
echo "ID (project-sampleName): ${${project}-${sampleName}"
#check modules
module list
for file in "${reads1FqGz[@]}" "${reads2FqGz[@]}"; do
echo "getFile file='$file'"
getFile $file
done
#Create string with input fastq files to merge
#This check needs to be performed because Compute generates duplicate values in array
INPUTFQ1=()
INPUTFQ2=()
echo "merging"
for fq in "${reads1FqGz[@]}"
do
echo $fq
array_contains INPUTFQ1 "$fq" || INPUTFQ1+=("$fq") # If fqFile does not exist in array add it
done
echo "done"
for fq in "${reads2FqGz[@]}"
do
echo $fq
array_contains INPUTFQ1 "$fq" || INPUTFQ2+=("$fq") # If fqFile does not exist in array add it
done
echo "writing to $(dirname reads1FqGz[@])/${sampleName}_R2.fq.gz"
if cat ${INPUTFQ1[*]} > $(dirname reads1FqGz[@])/${sampleName}_R1.fq.gz && cat ${INPUTFQ2[*]} > $(dirname reads1FqGz[@])/${sampleName}_R2.fq.gz
then
echo "returncode: $?"; putFile $(dirname reads1FqGz[@])/${sampleName}
putFile $(dirname reads1FqGz[@])/${sampleName}_R1.fq.gz
echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi
echo "## "$(date)" ## $0 Done "
3 changes: 3 additions & 0 deletions compute5/Public_RNA-seq_quantification/workflowKallisto.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
step,protocol,dependencies
MergeFastq,protocols/MergeFastq.sh,
Kallisto,protocols/Kallisto.sh,MergeFastq
63 changes: 63 additions & 0 deletions molgenis-pipelines/compute5/BIOS_phasing/parameters.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
########################################################################
## General parameters for SLURM settings and cluster directory paths
#
queue,ll
defaultInterpreter,#!/bin/bash
stage,module load
checkStage,module list
WORKDIR,/groups/
root,${WORKDIR}
group,umcg-bios
tmp,tmp04
resDir,/groups/umcg-wijmenga/tmp04/resources/
toolDir,/apps/software/
projectDir,${root}/${group}/${tmp}/projects/${project}/
fvdProjectDir,${root}/${group}/${tmp}/projects/umcg-fvandijk/projects/
########################################################################
## Software and data versions/builds/paths
#
beagleVersion,09Feb16.2b7-Java-1.8.0_45
beagleJarVersion,09Feb16.2b7
shapeitVersion,v2.r837-static
GCCVersion,4.9.3-binutils-2.25
intervaltreeVersion,2.1.0-foss-2015b-Python-2.7.9
pyvcfVersion,0.6.7-foss-2015b-Python-2.7.9
samtoolsVersion,1.2-foss-2015b
bedtoolsVersion,2.23.0-foss-2015b
pythonVersion,3.4.1-foss-2015b
biopythonVersion,1.65-foss-2015b-Python-3.4.1
ngsutilsVersion,16.06.1
zlibVersion,1.2.8
bzip2Version,1.0.6-foss-2015b
GLibVersion,2.45.2-foss-2015b
vcftoolsVersion,0.1.12b-goolf-1.7.20-Perl-5.20.2-bare
RVersion,3.2.1-foss-2015b
phaserVersion,f085550
tabixVersion,0.2.6-goolf-1.7.20
referenceFastaName,human_g1k_v37
genomeBuild,b37
onekgGenomeFasta,${resDir}/${genomeBuild}/indices/${referenceFastaName}.fasta
geneticMapDir,/apps/data/www.shapeit.fr/genetic_map_b37/
geneticMapChr,${geneticMapDir}/genetic_map_chr${chromosome}_combined_b37.txt
OneKgPhase3VCF,/apps/data/1000G/release/20130502//ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz
########################################################################
## Specific tools paths
#
## Input Beagle from GATK GenotypeGVF
genotypedChrVcfGLDir,${projectDir}/genotypeVcfGL/
genotypedChrVcfGL,${genotypedChrVcfGLDir}/${project}.chr${chromosome}.genotypeGVCF.gg.vcf.gz
genotypedChrVcfTbi,${genotypedChrVcfGL}.tbi
## Beagle
beagleDir,${projectDir}/beagle/
genotypedChrVcfBeagleGenotypeProbabilities,${beagleDir}/${project}.chr${chromosome}.beagle.genotype.probs.gg
genotypedChrVcfShapeitInputPrefix,${beagleDir}/${project}.chr${chromosome}.beagle.genotype.probs.gg
## Shapeit
shapeitDir,${projectDir}/shapeit/
phasedScaffoldDir,/groups/umcg-lld/tmp04/projects/genotypingRelease3/selectionLldeep/lldeepPhased/
shapeitPhasedOutputPrefix,${shapeitDir}/${project}.chr${chromosome}.shapeit.phased
## phASER
mapq,0
baseq,0
phaserDir,${projectDir}/phASER
## genotype concordance
comparisonFileDir,${fvdProjectDir}RNA-seq_rare_variants/comparison_files/
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=2

### variables to help adding to database (have to use weave)
#string project
###
#string stage
#string checkStage

#string WORKDIR
#string projectDir
#string beagleDir
#string genotypedChrVcfGL
#string genotypedChrVcfBeagleGenotypeProbabilities
#string genotypedChrVcfShapeitInputPrefix
#string GLibVersion
#string ngsutilsVersion
#string zlibVersion
#string bzip2Version
#string GCCversion

echo "## "$(date)" Start $0"

getFile ${genotypedChrVcfGL}
getFile ${genotypedChrVcfBeagleGenotypeProbabilities}.vcf.gz

${stage} ngs-utils/${ngsutilsVersion}
${stage} GLib/${GLibVersion}
${stage} zlib/${zlibVersion}
${stage} bzip2/${bzip2Version}
# THIS NEEDS TO BE LOADED AFTER NGS-UTILS TO PREVENT GCCXX ERROR
${stage} GCC/${GCCversion}
${checkStage}

#Run conversion script beagle vcf to shapeit format
if $EBROOTNGSMINUTILS/prepareGenFromBeagle4_modified20160601/bin/prepareGenFromBeagle4 \
--likelihoods ${genotypedChrVcfGL} \
--posteriors ${genotypedChrVcfBeagleGenotypeProbabilities}.vcf.gz \
--threshold 0.995 \
--output ${genotypedChrVcfShapeitInputPrefix}
then
echo "returncode: $?";
putFile ${genotypedChrVcfShapeitInputPrefix}.gen.gz
putFile ${genotypedChrVcfShapeitInputPrefix}.gen.sample
putFile ${genotypedChrVcfShapeitInputPrefix}.hap.gz
putFile ${genotypedChrVcfShapeitInputPrefix}.hap.sample
cd ${beagleDir}
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.gen.gz)
md5sum ${bname} > ${bname}.md5
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.gen.sample)
md5sum ${bname} > ${bname}.md5
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.hap.gz)
md5sum ${bname} > ${bname}.md5
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.hap.sample)
md5sum ${bname} > ${bname}.md5
cd -
echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi

echo "## "$(date)" ## $0 Done "

Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=2

### variables to help adding to database (have to use weave)
#string project
###
#string stage
#string checkStage

#string WORKDIR
#string projectDir
#string genotypedChrVcfGLDir
#string genotypedChrVcfGL
#string vcf
#string biopythonVersion
#string genotypedChrVcfGL
#string ngsutilsVersion

echo "## "$(date)" Start $0"

getFile ${vcf}

${stage} Biopython/${biopythonVersion}
${stage} ngs-utils/${ngsutilsVersion}
${checkStage}

mkdir -p ${genotypedChrVcfGLDir}

echo "Starting conversion."


#Run conversion script beagle vcf to shapeit format
if python $EBROOTNGSMINUTILS/PL_to_GL_reorder.py \
--vcf ${vcf} \
--out ${genotypedChrVcfGL}

then
echo "returncode: $?";
putFile ${genotypedChrVcfGL}
cd ${genotypedChrVcfGLDir}
bname=$(basename ${genotypedChrVcfGL})
md5sum ${bname} > ${bname}.md5
cd -
echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi

echo "Finished conversion."

echo "## "$(date)" ## $0 Done "

0 comments on commit a79238e

Please sign in to comment.