-
Notifications
You must be signed in to change notification settings - Fork 4
/
example.sh
105 lines (90 loc) · 2.53 KB
/
example.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash
if [ $1 ]; then
IDX=$1
else
IDX=1
fi
source ~/init.sh
conda activate twas_sim
hapmap=HAPMAP_SNPS/
loci=ind_loci.bed
genes=glist-hg19.nodupe.autosome
# PARAMETERS
# !!! change to point to results/output directory !!!
odir=/scratch1/xwang505/TWAS/output/
# !!! change to point to plink installation !!!
plink=/project/nmancuso_8/xwang505/tools/plink2
# !!! change to point to reference panel dir !!!
okg=/project/nmancuso_8/data/LDSC/1000G_EUR_Phase3_plink/
# !!! change to values for your study !!!
N=100000 # N GWAS
NGE=500 # N EQTL
MODEL=1 # eQTL model; see sim.py for details
H2G=0.1 # eQTL h2g
H2GE=0.001 # variance explained in complex trait; 0 (null) to 0.01 (huge effect) are reasonable values
LINEAR_MODEL=enet
# get genotype
while [[ ! -e $odir/twas_sim_loci${IDX}.bim ]]
do
echo "attempting ${IDX}"
params=`shuf -n 1 $loci`
set -- junk $params
shift
#1 10583 1892607
# get the region bounds +-500kb
chr=$1
numchr=`echo $chr | sed 's/chr//'`
locus_start=`python -c "print(int(max($2 - 500e3, 1)))"`
locus_stop=`python -c "print( int(int($3) + 500e3))"`
# grab geno from reference panel
# keep only common hapmap3 variants
$plink \
--bfile $okg/1000G.EUR.QC.${numchr} \
--chr $numchr \
--from-bp $locus_start \
--to-bp $locus_stop \
--make-bed \
--out $odir/twas_sim_loci${IDX} \
--snps-only \
--hwe midp 1e-5 \
--geno 0.01 \
--maf 0.01 \
--allow-no-sex \
--memory 2048 \
--keep EUR.samples \
--extract $hapmap/hm.$numchr.snp \
--force-intersect
# make sure we have at least MIN_SNPS in our data
if [[ `wc -l $odir/twas_sim_loci${IDX}.bim | awk '{print $1}'` -lt $MIN_SNPS ]];
then
rm $odir/twas_sim_loci${IDX}.{bim,bed,fam}
fi
$plink \
--bfile $odir/twas_sim_loci${IDX} \
--keep EUR1.samples \
--make-bed \
--out $odir/twas_sim_sample1_loci${IDX}
$plink \
--bfile $odir/twas_sim_loci${IDX} \
--keep EUR2.samples \
--make-bed \
--out $odir/twas_sim_sample2_loci${IDX}
done
# run simulation
echo "running fast simulation"
python sim.py \
$odir/twas_sim_sample1_loci${IDX} \
--eqtl-prefix $odir/twas_sim_sample2_loci${IDX} \
--test-prefix $odir/twas_sim_sample2_loci${IDX} \
--ngwas $N \
--nqtl $NGE \
--ncausal $MODEL \
--eqtl-h2 $H2G \
--fast-gwas-sim \
--IDX ${IDX}\
--h2ge $H2GE \
--linear-model $LINEAR_MODEL \
--seed ${IDX} \
--output $odir/twas_sim_loci${IDX}
# remove temporary genotype data
rm ${odir}/twas_sim_loci${IDX}.{bed,bim,fam}