-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #586 from npklein/master
GCCXX error fix, use gzipped vcf (phasing pipeline)
- Loading branch information
Showing
5 changed files
with
248 additions
and
0 deletions.
There are no files selected for viewing
67 changes: 67 additions & 0 deletions
67
compute5/Public_RNA-seq_quantification/protocols/MergeFastq.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#MOLGENIS nodes=1 ppn=1 mem=10gb walltime=01:00:00 | ||
|
||
### variables to help adding to database (have to use weave) | ||
#string sampleName | ||
#string project | ||
### | ||
#list reads1FqGz,reads2FqGz | ||
|
||
|
||
#Function to check if array contains value | ||
array_contains () { | ||
local array="$1[@]" | ||
local seeking=$2 | ||
local in=1 | ||
for element in "${!array-}"; do | ||
if [[ "$element" == "$seeking" ]]; then | ||
in=0 | ||
break | ||
fi | ||
done | ||
return $in | ||
} | ||
|
||
|
||
echo "## "$(date)" Start $0" | ||
echo "ID (project-sampleName): ${${project}-${sampleName}" | ||
#check modules | ||
module list | ||
for file in "${reads1FqGz[@]}" "${reads2FqGz[@]}"; do | ||
echo "getFile file='$file'" | ||
getFile $file | ||
done | ||
#Create string with input fastq files to merge | ||
#This check needs to be performed because Compute generates duplicate values in array | ||
INPUTFQ1=() | ||
INPUTFQ2=() | ||
echo "merging" | ||
for fq in "${reads1FqGz[@]}" | ||
do | ||
echo $fq | ||
array_contains INPUTFQ1 "$fq" || INPUTFQ1+=("$fq") # If fqFile does not exist in array add it | ||
done | ||
echo "done" | ||
for fq in "${reads2FqGz[@]}" | ||
do | ||
echo $fq | ||
array_contains INPUTFQ1 "$fq" || INPUTFQ2+=("$fq") # If fqFile does not exist in array add it | ||
done | ||
echo "writing to $(dirname reads1FqGz[@])/${sampleName}_R2.fq.gz" | ||
if cat ${INPUTFQ1[*]} > $(dirname reads1FqGz[@])/${sampleName}_R1.fq.gz && cat ${INPUTFQ2[*]} > $(dirname reads1FqGz[@])/${sampleName}_R2.fq.gz | ||
then | ||
echo "returncode: $?"; putFile $(dirname reads1FqGz[@])/${sampleName} | ||
putFile $(dirname reads1FqGz[@])/${sampleName}_R1.fq.gz | ||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
echo "## "$(date)" ## $0 Done " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
step,protocol,dependencies | ||
MergeFastq,protocols/MergeFastq.sh, | ||
Kallisto,protocols/Kallisto.sh,MergeFastq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
######################################################################## | ||
## General parameters for SLURM settings and cluster directory paths | ||
# | ||
queue,ll | ||
defaultInterpreter,#!/bin/bash | ||
stage,module load | ||
checkStage,module list | ||
WORKDIR,/groups/ | ||
root,${WORKDIR} | ||
group,umcg-bios | ||
tmp,tmp04 | ||
resDir,/groups/umcg-wijmenga/tmp04/resources/ | ||
toolDir,/apps/software/ | ||
projectDir,${root}/${group}/${tmp}/projects/${project}/ | ||
fvdProjectDir,${root}/${group}/${tmp}/projects/umcg-fvandijk/projects/ | ||
######################################################################## | ||
## Software and data versions/builds/paths | ||
# | ||
beagleVersion,09Feb16.2b7-Java-1.8.0_45 | ||
beagleJarVersion,09Feb16.2b7 | ||
shapeitVersion,v2.r837-static | ||
GCCVersion,4.9.3-binutils-2.25 | ||
intervaltreeVersion,2.1.0-foss-2015b-Python-2.7.9 | ||
pyvcfVersion,0.6.7-foss-2015b-Python-2.7.9 | ||
samtoolsVersion,1.2-foss-2015b | ||
bedtoolsVersion,2.23.0-foss-2015b | ||
pythonVersion,3.4.1-foss-2015b | ||
biopythonVersion,1.65-foss-2015b-Python-3.4.1 | ||
ngsutilsVersion,16.06.1 | ||
zlibVersion,1.2.8 | ||
bzip2Version,1.0.6-foss-2015b | ||
GLibVersion,2.45.2-foss-2015b | ||
vcftoolsVersion,0.1.12b-goolf-1.7.20-Perl-5.20.2-bare | ||
RVersion,3.2.1-foss-2015b | ||
phaserVersion,f085550 | ||
tabixVersion,0.2.6-goolf-1.7.20 | ||
referenceFastaName,human_g1k_v37 | ||
genomeBuild,b37 | ||
onekgGenomeFasta,${resDir}/${genomeBuild}/indices/${referenceFastaName}.fasta | ||
geneticMapDir,/apps/data/www.shapeit.fr/genetic_map_b37/ | ||
geneticMapChr,${geneticMapDir}/genetic_map_chr${chromosome}_combined_b37.txt | ||
OneKgPhase3VCF,/apps/data/1000G/release/20130502//ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz | ||
######################################################################## | ||
## Specific tools paths | ||
# | ||
## Input Beagle from GATK GenotypeGVF | ||
genotypedChrVcfGLDir,${projectDir}/genotypeVcfGL/ | ||
genotypedChrVcfGL,${genotypedChrVcfGLDir}/${project}.chr${chromosome}.genotypeGVCF.gg.vcf.gz | ||
genotypedChrVcfTbi,${genotypedChrVcfGL}.tbi | ||
## Beagle | ||
beagleDir,${projectDir}/beagle/ | ||
genotypedChrVcfBeagleGenotypeProbabilities,${beagleDir}/${project}.chr${chromosome}.beagle.genotype.probs.gg | ||
genotypedChrVcfShapeitInputPrefix,${beagleDir}/${project}.chr${chromosome}.beagle.genotype.probs.gg | ||
## Shapeit | ||
shapeitDir,${projectDir}/shapeit/ | ||
phasedScaffoldDir,/groups/umcg-lld/tmp04/projects/genotypingRelease3/selectionLldeep/lldeepPhased/ | ||
shapeitPhasedOutputPrefix,${shapeitDir}/${project}.chr${chromosome}.shapeit.phased | ||
## phASER | ||
mapq,0 | ||
baseq,0 | ||
phaserDir,${projectDir}/phASER | ||
## genotype concordance | ||
comparisonFileDir,${fvdProjectDir}RNA-seq_rare_variants/comparison_files/ |
63 changes: 63 additions & 0 deletions
63
molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=2 | ||
|
||
### variables to help adding to database (have to use weave) | ||
#string project | ||
### | ||
#string stage | ||
#string checkStage | ||
|
||
#string WORKDIR | ||
#string projectDir | ||
#string beagleDir | ||
#string genotypedChrVcfGL | ||
#string genotypedChrVcfBeagleGenotypeProbabilities | ||
#string genotypedChrVcfShapeitInputPrefix | ||
#string GLibVersion | ||
#string ngsutilsVersion | ||
#string zlibVersion | ||
#string bzip2Version | ||
#string GCCversion | ||
|
||
echo "## "$(date)" Start $0" | ||
|
||
getFile ${genotypedChrVcfGL} | ||
getFile ${genotypedChrVcfBeagleGenotypeProbabilities}.vcf.gz | ||
|
||
${stage} ngs-utils/${ngsutilsVersion} | ||
${stage} GLib/${GLibVersion} | ||
${stage} zlib/${zlibVersion} | ||
${stage} bzip2/${bzip2Version} | ||
# THIS NEEDS TO BE LOADED AFTER NGS-UTILS TO PREVENT GCCXX ERROR | ||
${stage} GCC/${GCCversion} | ||
${checkStage} | ||
|
||
#Run conversion script beagle vcf to shapeit format | ||
if $EBROOTNGSMINUTILS/prepareGenFromBeagle4_modified20160601/bin/prepareGenFromBeagle4 \ | ||
--likelihoods ${genotypedChrVcfGL} \ | ||
--posteriors ${genotypedChrVcfBeagleGenotypeProbabilities}.vcf.gz \ | ||
--threshold 0.995 \ | ||
--output ${genotypedChrVcfShapeitInputPrefix} | ||
then | ||
echo "returncode: $?"; | ||
putFile ${genotypedChrVcfShapeitInputPrefix}.gen.gz | ||
putFile ${genotypedChrVcfShapeitInputPrefix}.gen.sample | ||
putFile ${genotypedChrVcfShapeitInputPrefix}.hap.gz | ||
putFile ${genotypedChrVcfShapeitInputPrefix}.hap.sample | ||
cd ${beagleDir} | ||
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.gen.gz) | ||
md5sum ${bname} > ${bname}.md5 | ||
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.gen.sample) | ||
md5sum ${bname} > ${bname}.md5 | ||
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.hap.gz) | ||
md5sum ${bname} > ${bname}.md5 | ||
bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.hap.sample) | ||
md5sum ${bname} > ${bname}.md5 | ||
cd - | ||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
echo "## "$(date)" ## $0 Done " | ||
|
52 changes: 52 additions & 0 deletions
52
molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertPLtoGL.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=2 | ||
|
||
### variables to help adding to database (have to use weave) | ||
#string project | ||
### | ||
#string stage | ||
#string checkStage | ||
|
||
#string WORKDIR | ||
#string projectDir | ||
#string genotypedChrVcfGLDir | ||
#string genotypedChrVcfGL | ||
#string vcf | ||
#string biopythonVersion | ||
#string genotypedChrVcfGL | ||
#string ngsutilsVersion | ||
|
||
echo "## "$(date)" Start $0" | ||
|
||
getFile ${vcf} | ||
|
||
${stage} Biopython/${biopythonVersion} | ||
${stage} ngs-utils/${ngsutilsVersion} | ||
${checkStage} | ||
|
||
mkdir -p ${genotypedChrVcfGLDir} | ||
|
||
echo "Starting conversion." | ||
|
||
|
||
#Run conversion script beagle vcf to shapeit format | ||
if python $EBROOTNGSMINUTILS/PL_to_GL_reorder.py \ | ||
--vcf ${vcf} \ | ||
--out ${genotypedChrVcfGL} | ||
|
||
then | ||
echo "returncode: $?"; | ||
putFile ${genotypedChrVcfGL} | ||
cd ${genotypedChrVcfGLDir} | ||
bname=$(basename ${genotypedChrVcfGL}) | ||
md5sum ${bname} > ${bname}.md5 | ||
cd - | ||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
echo "Finished conversion." | ||
|
||
echo "## "$(date)" ## $0 Done " | ||
|