forked from dna-seq/dna-seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dvc.yaml
70 lines (70 loc) · 3.33 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
stages:
start:
desc: "convenience stage to start services"
frozen: true
cmd: docker-compose up
stop:
desc: "convenience stage to stop services"
frozen: true
cmd: docker-compose down
prepare_genome:
deps:
- data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
cmd: >-
gunzip -k data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz &&
samtools faidx data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa &&
samtools dict data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa -o data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.dict
outs:
- data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa
- data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.fai
- data/ensembl/103/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.dict
prepare_vep_cache:
deps:
- data/ensembl/103/cache/homo_sapiens_vep_103_GRCh38.tar.gz
cmd: tar -zxf data/ensembl/103/cache/homo_sapiens_vep_103_GRCh38.tar.gz -C data/ensembl/103/cache/
outs:
- data/ensembl/103/cache/homo_sapiens
prepare_vep_plugins:
deps:
- data/ensembl/103/103.zip
cmd: >-
unzip -u data/ensembl/103/103.zip -d data/ensembl/103/ &&
mv data/ensembl/103/VEP_plugins-release-103 data/ensembl/103/plugins
outs:
- data/ensembl/103/plugins/
prepare_annotations_digenet:
deps:
- data/gwas/annotations/all_variant_disease_pmid_associations.tsv.gz
cmd: >-
gunzip -c data/gwas/annotations/all_variant_disease_pmid_associations.tsv.gz |
awk '($1 ~ /^snpId/ || $2 ~ /NA/) {next} {print $0}' |
sort -t $'\t' -k2,2 -k3,3n |
awk '{ gsub (/\t +/, "\t", $0); print}' |
bgzip -c > data/gwas/annotations/all_variant_disease_pmid_associations_final.tsv.gz
outs:
- data/gwas/annotations/all_variant_disease_pmid_associations_final.tsv.gz
prepare_annotations_clinvar:
deps:
- data/gwas/annotations/clinvar.vcf.gz
- data/gwas/annotations/clinvar.vcf.gz.tbi
cmd: echo "Clinvars annotations prepared"
# https://www.ebi.ac.uk/gene2phenotype/downloads/ has no E-Tag!
prepare_annotations_gene2phenotype:
deps:
- data/gwas/annotations/gene2phenotype/DDG2P.csv
- data/gwas/annotations/gene2phenotype/CancerG2P.csv
- data/gwas/annotations/gene2phenotype/EyeG2P.csv
- data/gwas/annotations/gene2phenotype/SkinG2P.csv
cmd: >-
rm -f data/gwas/annotations/gene2phenotype/AllG2P.csv &&
awk '(NR == 1) || (FNR > 1)' data/gwas/annotations/gene2phenotype/*.csv
> data/gwas/annotations/gene2phenotype/AllG2P.csv
# && for f in data/gwas/annotations/gene2phenotype/*.gz ; do gunzip -c "$f" > data/gwas/annotations/gene2phenotype/"${f%.*}" ; done
outs:
- data/gwas/annotations/gene2phenotype/AllG2P.csv
install_opencravat:
cmd: >-
oc module install-base && oc module install -y clinvar clinvar_acmg biogrid uk10k_cohort uniprot pubmed provean polyphen2 cardioboost civic civic_gene cosmic cosmic_gene go sift ensembl_regulatory_build dgi cvdkp
install_cancer:
cmd: >-
oc module install chasmplus cgc cgl cancer_genome_interpreter cancer_hotspots civic