From 5ea13d9fbd1457e97d4a2d2a168a04dd2eb22d5a Mon Sep 17 00:00:00 2001 From: Feiyu Du Date: Wed, 2 Aug 2023 21:41:09 -0500 Subject: [PATCH] Use Repo for script path, launcher script update with 151x19x10x151, QC metrics update with PCT_TARGET_BASES_GT_1250x --- MyeloseqHD.wdl | 2 +- MyeloseqHDAnalysis.wdl | 4 +- accessory_files/MyeloseqHD.QCMetrics.json | 2 +- dockerfiles/docker-myeloseq/QC_metrics.pl | 2 +- imports.zip | Bin 3347 -> 3340 bytes scripts/launcher.pl | 49 +++++++++++++--------- 6 files changed, 36 insertions(+), 23 deletions(-) diff --git a/MyeloseqHD.wdl b/MyeloseqHD.wdl index 274cfd4..5b7d3ee 100644 --- a/MyeloseqHD.wdl +++ b/MyeloseqHD.wdl @@ -51,7 +51,7 @@ workflow MyeloseqHD { String CustomAnnotationIndex = MyeloSeqHDRepo + "/accessory_files/MyeloseqHD.custom_annotations.vcf.gz.tbi" String CustomAnnotationParameters = "MYELOSEQ,vcf,exact,0,TCGA_AC,MDS_AC,BLACKLIST" - String QC_pl = "/usr/local/bin/QC_metrics.pl" + String QC_pl = MyeloSeqHDRepo + "/dockerfiles/docker-myeloseq/QC_metrics.pl" String xfer_pl = MyeloSeqHDRepo + "/scripts/data_transfer.pl" String DemuxFastqDir = "/storage1/fs1/gtac-mgi/Active/CLE/assay/myeloseqhd/demux_fastq" diff --git a/MyeloseqHDAnalysis.wdl b/MyeloseqHDAnalysis.wdl index 2c87cba..9850a17 100644 --- a/MyeloseqHDAnalysis.wdl +++ b/MyeloseqHDAnalysis.wdl @@ -106,6 +106,7 @@ workflow MyeloseqHDAnalysis { Vaf=MinVaf, Reads=MinReads, Name=Name, + MyeloSeqHDRepo=MyeloSeqHDRepo, queue=Queue, jobGroup=JobGroup } @@ -313,6 +314,7 @@ task clean_variants { task combine_variants { input{ Array[String] Vcfs + String MyeloSeqHDRepo String Cram String CramIndex String refFasta @@ -328,7 +330,7 @@ task combine_variants { command { /usr/local/bin/bcftools merge -F x -m none --force-samples -Oz ${sep=" " Vcfs} | /usr/local/bin/bcftools sort -Oz -o combined.vcf.gz && \ /usr/bin/tabix -p vcf combined.vcf.gz && \ - /usr/bin/python3 /home/fdu/git/cle-myeloseqhd/dockerfiles/docker-myeloseq/filterHaloplex.py -r ${refFasta} --minreadsperfamily ${default='3' MinReadsPerFamily} -m ${default='5' Reads} -d ${default='0.02' Vaf} combined.vcf.gz ${Cram} ${Name} > ${Name}.combined_and_tagged.vcf + /usr/bin/python3 ${MyeloSeqHDRepo}/dockerfiles/docker-myeloseq/filterHaloplex.py -r ${refFasta} --minreadsperfamily ${default='3' MinReadsPerFamily} -m ${default='5' Reads} -d ${default='0.02' Vaf} combined.vcf.gz ${Cram} ${Name} > ${Name}.combined_and_tagged.vcf } runtime { diff --git a/accessory_files/MyeloseqHD.QCMetrics.json b/accessory_files/MyeloseqHD.QCMetrics.json index 566dca1..a0a0fe5 100644 --- a/accessory_files/MyeloseqHD.QCMetrics.json +++ b/accessory_files/MyeloseqHD.QCMetrics.json @@ -20,6 +20,6 @@ "Failed hotspot": "0", "Haplotect sites": ",10", "Haplotect score": ",0.02", - "ASSAY VERSION": "2.2", + "ASSAY VERSION": "2.3", "DISCLAIMER": "This laboratory developed test (LDT) was developed and its performance characteristics determined by the CLIA Licensed Environment laboratory at the McDonnell Genome Institute at Washington University (MGI-CLE, CLIA #26D2092546, CAP #9047655), Dr. David H. Spencer MD, PhD, FCAP, Medical Director. 4444 Forest Park Avenue, Rm 4111 St. Louis, Missouri 63108 (314) 286-1460 Fax: (314) 286-1810. The MGI-CLE laboratory is regulated under CLIA as certified to perform high-complexity testing. Interpretation of sequencing results and case sign out is performed by Pathology and Immunology faculty in the Division of Anatomic and Molecular Pathology (WU-AMP Core Laboratory, CLIA #26D2013203, CAP #7233522) Dr. Fouad Boulos, MD, Medical Director. Pathology Consult Services, 425 S. Euclid Ave, Suite 4711, Mailstop 8024-14-4711, St. Louis, MO 63110 (314)-362-7784 Fax: (314) 362-4080. The AMP core laboratory is regulated under CLIA as certified to perform high-complexity testing. This test has not been cleared or approved by the FDA." } diff --git a/dockerfiles/docker-myeloseq/QC_metrics.pl b/dockerfiles/docker-myeloseq/QC_metrics.pl index 4dcccd5..2d22503 100644 --- a/dockerfiles/docker-myeloseq/QC_metrics.pl +++ b/dockerfiles/docker-myeloseq/QC_metrics.pl @@ -35,7 +35,7 @@ PCT_MAPPED_READS => 'MAPPING/ALIGNING SUMMARY: Mapped reads (%)', MEAN_INSERT_SIZE => 'MAPPING/ALIGNING SUMMARY: Insert length: mean', PCT_TARGET_BASES_GT_250x => 'COVERAGE SUMMARY: Target bases >250x (%)', - PCT_TARGET_BASES_GT_2000x => 'COVERAGE SUMMARY: Target bases >2000x (%)', + PCT_TARGET_BASES_GT_1250x => 'COVERAGE SUMMARY: Target bases >1250x (%)', PCT_TARGET_ALIGNED_READS => 'COVERAGE SUMMARY: Aligned reads in target region (%)', AVG_ALIGN_TARGET_COVERAGE => 'COVERAGE SUMMARY: Average alignment coverage over target region', PCT_LOW_COVERAGE_AMPLICON => 'AMPLICON SUMMARY: Amplicons with low coverage (%)', diff --git a/imports.zip b/imports.zip index 70790f5abcc9ef332af71321230b029c16eb77f3..20ffd2f747e291326cb734ebb82056778125b804 100644 GIT binary patch delta 3280 zcmV;>3@`JO8jKnpP)h>@6aWAK2mtb#0av33ZL3NQ008Yh000&M9Fxur8Gi`?11O-$ zWGJA?WOaB8009I(aSs3lhC&Yj-CKE68#xyLKc7P5r2@>NZre;!3`r-&5fXO8Kp>Tx ztqkju+p^uz7u{`xL!8gPPf{OhNd`MyQ|l^lw~i<2>Al}OB#&v5dQpg4R`c1j%P2YT z`_Uyjm{31T>BWcLt&sSW)PGCS^|NOPK3*7)vf_n(PO`)c2WXoFhNAtjN3YcK-GmHi z_{HtdtT1-_g9KC?Hlh7Jl4b;{700ws6B@d7*K;$q@(Yb!;tr`+P7;rV+3vPB4rj;- zW5_X$Bdz)aiTx;}ZnjN($WdCOf1-v>T$iS4luSB(&!?$fao8Fboqw$^?KM==TS4r5 zZWJ=g3zH^!=hR4gCmK`6@}DAIbpEts4F~HR#sNOL2Na&%l$9+yz^9$Ovvrn6p+5Oh znnl4@7)BY%K!QPReqs=pQ|XYBOlpnYzBTxE>v&v7?{7RS>w342c|;OWf(%p^25ubu z=iSG{lXsuifWjKRB7bhS)?E9%^M0$dwX=4xd&0i9KW^>(`{Vw}=Y}e?Z_#@SbSE*D zHE}=8(18~oQ_@Sz!aYACy!Z?0YoZ?{p;|)iqi?+lA6F_xz2GEi+JL2n}S(l8$PVB+*{Z2v$%rDRwxC0R6 zNG!J8#K2D0O7K%@>S;}n>A;aJ(Y^BXZrtb&zIkycBVF%`>4x;0Gs&zR8WXL>^y6W4 z`j%~F6W15G;LIOhvxBY|(rR_4Wu#6@eek9_&|~Fm>3>tM8Q5U)GTLJR#%=YQhJSh! z$!wmk8U_De)#G-af>m~eaW?1{*m0*Y=nL#*-K%lxuMYq^V;UQvMr=0|O?Bsuf;k7P za>w5(9`v2j-Acz$c56SeyT;O6`&MqAMxd&CG(Z_abV7PY52rvo*gXq$1EwVuMH_{= zLJ#Q%Nq_d^Mq6@sK`nzdog24{A5^e#)Tsrh4-X-XD{_STXreP9*^nk83ex!%B|WI@ zPMlK0tX>q(a*FmC5$9`d53FdE2~gHJU2B$l8BJ$5Wa4YY6N6}<{_T9WfSgmXXK|Dm zWU}P3ycK3P&lZBaXfMQe4*Y555a5BG*0HdR@_(H2Nw;cpz~|K7ei|9dRE1MM-O(sM zp;z?|(pa%8FD;*fU-0?#s%o-08VuPsu43eg2Je>{b!loep9mP;X)T2%$_ zN`*PrKa)8mm0y&Re$KYM9Jtr>65vre4$tZ|;D0`VpaeciK*An+{rYvaJN76|Y(E0c zw12^2u`@DY9K>Vv;~Ka!g)eDgQ)FjB%o+`7&teVVKm*35DNEgXAl6oPm7(X)(O)x% zNJ)@Ik)NWmM=ufn3=|5UP7&TgI6{qrD6=FDZP5A1dpzN#kb3^S>bT;9c;G&IF>4KZ z{cj2rR?&BqGW7b6Mxl3sunSM*@*FMyVt@Z-+2|U@urC=}YOgJ|aO)MCF4y_wMkD4D z8aTxk;Ks)J{J0kTJiWb-M1S61j5`Z8=9ataAsWAV^Cp)yFh!XcP)YkxFLKWT(Y*i= zZ4))bYior?HffejtU>Bpm*7777VVAfVBld0W$Yl&MGkwLl+=fRK+8GRp!sW`PgA!sU9dS;vbmo#QSb8H*ZNoXHHe}l? zHgkmqIeShNlOxPEKsDqF24oqO+$e1(*^|zxqvjw+t(`s8U zx*Lp2JI}jdfni=~kdocLO&z$y)rr^{RWg`$^1h2k73cQfuBjP3R!rto5h*yl! zuTL{p$HZsFya8f)9f){QImZZXmR`k2T37rY;iNt`l@_>O?mAQ)OBIdx$A3G+^^Ha@ zJW$)av36XI%w|%baY}n=Ikn&YvFm(o;@?kyeP!Fr^$ryVSIMFW!>x*CAP+CC_086t z;MID0H}K+LGq8(sLBfxU4H|gJsl@9FH8f|+a4Krj*$dMl#hJkqEOl0zI@hgXmik_o zOCz5fv${*yF#HtuwOEie>wo;1$|Tpq&tWSA9bqh)-Mz-tC$(uCk0up57j&Z~juLO^ zA{`G_LZzg7??J9+*^R|*#I+(M75H`CzAy&d=fj8J3c34!17en)!YBcEI>02HFoa%` zQ4&ZkK%<8^4e0bnax6r)x7et;#C=xhy8H8_(*BSjs3yp3ewv9RWq(gi?(5CXGH*Tt zP^5kKM>uKoQuA0;luDZ58P(_C3o8aoZt;B4i-+HLv`?oqyNy+zE( zoPFX+HjKh`m{@&Jr+>CkC~*fvJmW%nY?0IZK>P?+c4fsA=5y811cS?%FlNUw+22Z!vNyn=B2i2y&(CD?^H%x<5ibn3RBvF{t zPPLLO9amkcpMTOBga95PX5ANI1t^=%;tE!1e|dLgANiw!7p7Hhnz%L%QyO%A%-)%L zayt`{jITaEr2}D(>^EpLNt84#r@pa5@RP5zGq%KLT`e$Xfek z(1|_wTpPkuBno43PmdMrdOcm!iz`KCy`z5L_qhG!NPod>3AySJwhWe827fol5S}`i zTl$|&Z0}*4g=BIt=Iov^ftw{k5J8QVgRMV1TYKoWVpW^1WdV#+6eZxoWJokzWv|6` z$xePnvQ{RpOE!$#aX)Z!znWdmpzl3QPz^iPYQDfCPtdY{pFiR9cy53yK5Bp=AKV)I zscbbm5P#~Czd6EQC}c1kYRw48P1o0Fc_ue!h0KwJ>;cR^&|(70PGrFRAMTwvjlZ%6 zTE0e0ZD*;?PGtB24L_Lqam~-Q-iTCT9Pyjyv$LU5OvAI@Io{wyoRKj>_|M?#BI()?83K}L-juup>%I(VJhgn>37Jsa>7K5rB!JGg*@^mck_W&u_QV%wp z)_?lbr`d|TAF^UsS3V2}KGiJ{V;2w<&v@{oU$8JEKylY%9?gET>+wpZbR|NqD(^w4 z6$>7}-<)Y#Bq3tXeiY$LF$u0~hvM9R?lXKfKl1a51uiYaUU9dAB^JUmoacD}d4^Mw zcbB#SN-m(JD;wzal*tTK{37J5oYK^;nw}a%{@`51Jf<$X8_Pb0JYoo#vRQ*wd4EWW zbN4Ki1AbHrkxr0eU=@r)@f8g{;@7?5$CWtsLhjrrK8qNg>Zg!O#SIqY?x#dD>?-lm zxR?-!REDGF6!}?peu!j%Q?JC#vl?bTGJutEb;sK@cak0D*J@_bxfeNkLSdL?B>%rQ zocpxp@9%_5Tdh_6hDAw={|``00Tu!t0~7!N00;o`nE_X$2W_iL3;+P_le!Er11O-$ OlkE&32AT{20000xEIM-l delta 3277 zcmV;;3^MbK8j~6qP)h>@6aWAK2mqa1$X2Qus?<>o005+s7b1Vk0fJ<4F@j`ucnbgl z1U_*O00f3Y4*=a;dsEvw7XQDWLO074xQpy0w0F3)Zih!{*)10eozC59*^J^SiNP;y zCy>(gv+t4Qhb$?Olt(+88HlaNk#zJsPbp)Xq+S%FmeqXn+n*=GuS9owr+^t(~=l-4ph;{eElb-|zQNJ~mX$zD92;(4E9omc;!qLkC`XOi3>- z3-|np@ZwLTuR%XZLbZtae#cbI*#gVZ%4$qT1^(%O{T$SI$+HWzHR4xh7Dca~4uDQ2(D-SsI6J7YmTMI_Bj_?mXNI{++w zSpXNOShOV`7y#ALnYwn^Kdq+F*-r6r->Zs3P)%8bl(2NlC#{VDv?IRmnRUr1?8F`{-|r-J!2AM@fjgjr9Eru2n;6*1S_ytC zO+BsYF&#LPCAwFB-i;gG!B;QtWTfj|G1-t_b0(RELt~<~n0!2pOy9DtY~uO?7o7RS zOLoxpLRziPw2ahAsSn;%2YRf0E`7>10~>!VUPgONfpJ^Crs+SuiDWiUSB=2GSMj)= zr(l&GVVn)J1$Nvi4Em#CkV(Lp#s=jfHjsf$-Ag0X=2(30$~(n_&THLKbPQ#;_7nSM zEWNdF<+y1Cs;WoZo)N?wq-XS53jGDUX93z_S_1moD9pKeNXJI92sc`Xy92chzjS|Y z+zvmeU;(C63pXELDKM_cG3TQ}XF#$cO+;Ly^D9bvP}!Y0CBm#;6wY#U_81Z8b8Qc- zXp{*xt8u#4EcG&)&Th!yYt$zOXrKOaURXfRDOjjDN(`7Rc`R>*na#67a2M%?Xw88? ztsFu&u)Ml1EaNt(eA4Zd9Kt!Zw;zAj3}y1cDW9}9_2Av9zy3gK8l+I0?LBg2 z)NnZD-GRW9jHKsESj20KWS&BFfm|L>$|+@qvQ*2Zph>N&!f~a-9KD~(9Foc}%Gf<; zTV4*_>v;+As2qpqX&Uf9A3#t7pClk*550W(vf3SclqR+xfn?g?u-F+HFb;p>G5UTD zT$#d`w6H0%vmj=T2DE3fhOeLj}LT!07eqvx~Mkk>y`n6Qezp_FN_Z)g;H z7YMuXL@v+K@^AJR%SP8IhJAm@&{BJCsfAlF&~&-ZCpQ`~m(ai|wg5La&gc8J*yri( zeI)wx_F~*ws4=(PRS(hl)vH%I*1!~HUO*-3L%qm72aWCppwTu_L%gfUFRyjoE#v`E8l1c&^M9BmS+s}W{TdADO&88$i z`~y^0g?y{}4ieU>3kg0VUh*dFt=_EhiZS~2X~yc9_^g;Ws90VH z5HBj{7@^G)ruazfir*rf)W@dM0@ur3hl*pVqVe{4XSlx6$iaUDwY?i_$JNMeCiNMo zw1<{c`^{gw&gUln^Yr%@w!K{MQ0d?*{qtaYt6~}8;SXzlvo)vkYW;CH<;A~d=q|xu&JUhUxE6nY4qF+>2xH0Y?lq?VRGYT(XsFP+pc^f5ICw)B>ANVGOv>hY!CNxchd4#w2mK&Mxd zVn^pvOh zRpi$EGpWqzq1~T5Rd?U&zFWEHn6pnj$%aw5j_e^gEOsAcVBlpofCN`E zU<;`cS0{hOb6IPP#N9jMN3h5%E1odNs-`k9_=O4McMLQ*zoEWxnU>d=_5FZOf!yZ# zGAf+0o?PCvn(bw_TT^YhOSzky!(KADMavHCXeW=*ORwtJ&?L=AGn_M+VGPWw^CLwQ zh|sG%NSar&wL4#ej(!kya|5F`nSG|C%6dSqw)TG>CWe!E@Ac7Vw6wxSZjb;YAu?G0 z??09qPD{p3zm;Nn=_2f}BFBNy>huN5hP1k5!#_oSqt?H~cz&BnY^Eam3-s<3S@~IH z6y3$0+?jLG(sl-Sw$@2kTk{X9O?{!!YrSun2B#E_++j(gu$i4|C0RPIx>7%-GYElt zgus7vU&Ia|Y&HuXSfTyt&5?cNj|N_tR<&v3+B8gQ(DgBUXX43CPCzogDrLeL^H>HyfE-1KqrT+wND0}*mKXdAv^_97>nz5 ztXP-p>6%_#2`cLy_4_{1Vsx0MV77!@bqIf321_4+jCi9(t)*)n;p1D8?y@67c0SBpR-=*W#9AC%@TPD-*XI8^-Op zA9ZrSl3mT9?>$W93_I0oKF1;%(6auVKdbV1Zh#6OHHwf=XN~<-wi+D>^<3W^MlXL9 zH5g8=X2jg4Tj#SplbcgN=3qkh0A`adnZogOV&Wk*J!EjEVbFO3qOV7 zr!U{H`QgoA<^R98KWT>vPa@pMPOMPb$e#f6G_B>Sh`YF8?ydk<<=dD%g&Sn!Np`NfM;yGTyN zoB%1pmtqoJ)(*wt{M-liYKlb8F)UrzmoU9CvB1$X>=hR%SYjb8!+D+ukY_j*$#rQP zpyUEdO71Iq2C4u%(1`iHC1ii#y$kPv6n-MKbX}rhD%`Ca2#kl$!5CTg?^Zr-cAzc2 zvLDWm@{3h^KPHAdaO;3#>e{e2h9#a?Hjb)z#j~D``Y}S8ZRZ3Fg+&zPG zz|TV=(g{)wtb$P}zM`Q={JJ;%xQJ74-Ohcivxw2De!-|z++Z>8eiT$o?bhR?aWNsT zQkfntr^pYi^OGWjI`vAOc~;YzkF3B-b#=!pGk3y{@*^`dbnXQwKM@SGjO72join($out_dir, 'demux_sample_sheet.csv'); -my $ss_fh = IO::File->new(">$dragen_ss") or die "Fail to write to $dragen_ss"; -$ss_fh->print("[Settings]\n"); -$ss_fh->print("AdapterBehavior,trim\n"); -$ss_fh->print("AdapterRead1,AAGATCGGAAGAGCACACGTCTGAACTCC+CAGATCGGAAGAGCACACGTCTGAACTCC+GAGATCGGAAGAGCACACGTCTGAACTCC+TAGATCGGAAGAGCACACGTCTGAACTCC\n"); -$ss_fh->print("AdapterRead2,AAAGATCGGAAGAGCGTCGTGTAGGGAAA+CAAGATCGGAAGAGCGTCGTGTAGGGAAA+GAAGATCGGAAGAGCGTCGTGTAGGGAAA+TAAGATCGGAAGAGCGTCGTGTAGGGAAA\n"); -$ss_fh->print("OverrideCycles,N1Y150;I8N2;U10;N1Y150\n"); -$ss_fh->print("[Data]\n"); -$ss_fh->print("Lane,Sample_ID,Sample_Name,Sample_Project,index,index2\n"); -$ss_fh->print($ds_str); -$ss_fh->close; - -## Sample Index -my $si = File::Spec->join($out_dir, 'sample_index'); -my $si_fh = IO::File->new(">$si") or die "Fail to write to $si"; -$si_fh->print($si_str); -$si_fh->close; - ## Get RunInfoString my $run_xml = File::Spec->join($rundir, 'RunParameters.xml'); unless (-s $run_xml) { @@ -207,6 +188,36 @@ } $xml_fh->close; +## DRAGEN sample sheet +my $num_N; +if ($index1cycle == 19) { + $num_N = 11; +} +elsif ($index1cycle == 10) { + $num_N = 2; +} +else { + die "Invalid index1cycle $index1cycle"; +} + +my $dragen_ss = File::Spec->join($out_dir, 'demux_sample_sheet.csv'); +my $ss_fh = IO::File->new(">$dragen_ss") or die "Fail to write to $dragen_ss"; +$ss_fh->print("[Settings]\n"); +$ss_fh->print("AdapterBehavior,trim\n"); +$ss_fh->print("AdapterRead1,AAGATCGGAAGAGCACACGTCTGAACTCC+CAGATCGGAAGAGCACACGTCTGAACTCC+GAGATCGGAAGAGCACACGTCTGAACTCC+TAGATCGGAAGAGCACACGTCTGAACTCC\n"); +$ss_fh->print("AdapterRead2,AAAGATCGGAAGAGCGTCGTGTAGGGAAA+CAAGATCGGAAGAGCGTCGTGTAGGGAAA+GAAGATCGGAAGAGCGTCGTGTAGGGAAA+TAAGATCGGAAGAGCGTCGTGTAGGGAAA\n"); +$ss_fh->print('OverrideCycles,N1Y150;I8N'.$num_N.";U10;N1Y150\n"); +$ss_fh->print("[Data]\n"); +$ss_fh->print("Lane,Sample_ID,Sample_Name,Sample_Project,index,index2\n"); +$ss_fh->print($ds_str); +$ss_fh->close; + +## Sample Index +my $si = File::Spec->join($out_dir, 'sample_index'); +my $si_fh = IO::File->new(">$si") or die "Fail to write to $si"; +$si_fh->print($si_str); +$si_fh->close; + my $run_info_str = join ',', $runid, $instr, $side, $fcmode, $wftype, $R1cycle, $index1cycle, $index2cycle, $R2cycle; ## Input JSON