diff --git a/data/Digest_sIndel_calls.R b/data/Digest_sIndel_calls.R index 2e8baec..2721cfc 100755 --- a/data/Digest_sIndel_calls.R +++ b/data/Digest_sIndel_calls.R @@ -2,14 +2,14 @@ library(scan2) # Set all of the below to T to generate the table and save it -step1.load=F -step2.rescore=F # make 1% FDR calls and rescue -step3.master_table=F # join all raw calls into a single dataframe -step4.exact_recurrence_filter=F # SHOULDN'T have an effect on indels because of - # the cross-sample filter. -step5.clustered_mut_filter=F # removes a small number of indels +step1.load=T +step2.rescore=T # make 1% FDR calls and rescue +step3.master_table=T # join all raw calls into a single dataframe +step4.exact_recurrence_filter=T # SHOULDN'T have an effect on indels because of + # the cross-sample filter. +step5.clustered_mut_filter=F # removes a small number of indels step6.finalize=F -step7.save=T +step7.save=F if (step1.load) { load('../metadata.rda') diff --git a/data/Digest_sSNV_calls.R b/data/Digest_sSNV_calls.R index d97ccc3..8dd7e4c 100755 --- a/data/Digest_sSNV_calls.R +++ b/data/Digest_sSNV_calls.R @@ -7,7 +7,7 @@ library(data.table) step1.load=F # just load data from disk. takes a while. step2.rescore=F # rescore sites to FDR=1% and do SCAN2 mutation signature rescue step3.make_initial_tables=F # use info from (1) and (2) to make passAB determinations -step4.exact_recurrence_filter=F # filter out SNVs called in more than one donor. +step4.exact_recurrence_filter=T # filter out SNVs called in more than one donor. step5.clustered_mut_filter=F # remove SNVs within 50bp in the same sample # these are mostly dinuc subs. step6.finalize=F @@ -138,12 +138,19 @@ if (step4.exact_recurrence_filter) { print(addmargins(table(recs,donors))) nmut$rec.filter <- donors[nmut$id] > 1 + # This is the correct" way to remove mutations called in 2 + # or more donors. However, the above filter (which does not + # require the calls to be passed, only to be in the list of + # FDR < 50% candidate sites) finds an additional 57 sSNVs that + # recur exactly in another brain with fairly high read support. + # I think it's best to regard these as likely FPs and remove them. cat('Recurrence x donor table (any passA,B,M)\n') - z <- split(nmut$donor[nmut$passA | nmut$passB], - nmut$id[nmut$passA | nmut$passB]) + z <- split(nmut$donor[nmut$passA | nmut$passB | nmut$passM], + nmut$id[nmut$passA | nmut$passB | nmut$passM]) donors <- sapply(z, function(v) length(unique(v))) recs <- sapply(z, length) print(addmargins(table(recs,donors))) + #nmut$rec.filter <- donors[nmut$id] > 1 } diff --git a/figure_5/ExtData_Fig8.R b/figure_5/ExtData_Fig8.R new file mode 100644 index 0000000..bbaee50 --- /dev/null +++ b/figure_5/ExtData_Fig8.R @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript +library(data.table) + +meta <- fread('../external_data/Roadmap_Epigenomics/EID_metadata_with_H3K27ac.txt') +setkey(meta, EID) + +states <- c("1_TssA", "2_TssAFlnk", "3_TxFlnk", "4_Tx", "5_TxWk", + "6_EnhG", "7_Enh", "8_ZNF/Rpts", "9_Het", "10_TssBiv", "11_BivFlnk", + "12_EnhBiv", "13_ReprPC", "14_ReprPCWk", "15_Quies") + +snv.states <- lapply(states, function(s) { + # Add 1e-4 to p-value since 0 out of 10,000 permutations is better + # interpreted as p < 1e-4 rather than p=0. + snv <- sapply(meta$EID, function(eid) { + load(sprintf('sSNVs_vs_%s_ChromHMM15.SUMMARY.rda', eid)) + ret <- unlist(es[[1]][s,c('enr', 'pval')]) + c(ret[1], -log10(ret[2] + 1e-4)) + }) +}) + +indel.states <- lapply(states, function(s) { + indel <- sapply(meta$EID, function(eid) { + load(sprintf('sIndels_vs_%s_ChromHMM15.SUMMARY.rda', eid)) + ret <- unlist(es[[1]][s,c('enr', 'pval')]) + c(ret[1], -log10(ret[2] + 1e-4)) + }) +}) + + +pf <- function(x, xticks, no.x=FALSE, no.y=FALSE, ...) { + anatomy <- meta[colnames(x)]$ANATOMY + type <- meta[colnames(x)]$TYPE + pfc <- meta[STD_NAME=='Brain_Dorsolateral_Prefrontal_Cortex']$EID + + ylab <- if (no.y) '' else 'Significance: -log10(p-value)' + xlab <- if (no.x) '' else 'Enrichment (or depletion) ratio (obs/exp)' + plot(t(x), + col=ifelse(anatomy=='BRAIN' & type=='PrimaryTissue', + 'red', ifelse(x[2,] > 1, 'black', 'grey')), + bty='n', xaxt='n', yaxt='n', pch=20, + ylim=c(0,4), cex=2, + ylab=ylab, xlab=xlab, ...) + # replot brain points on top of others + points(t(x[,anatomy=='BRAIN' & type=='PrimaryTissue']), col='red', + pch=20, cex=2) + abline(h=1, lty='dotted', col='black', lwd=0.55) + abline(v=1, lty='dotted', col='black', lwd=0.55) + segments(1, 2, x[1, pfc], x[2, pfc], col='red') + if (!no.y) axis(side=2, at=0:4, lwd=0.35, lwd.ticks=0.35) + #if (!no.x) axis(side=1, at=xticks, lwd=0.35, lwd.ticks=0.35) + if (!no.x) axis(side=1, lwd=0.35, lwd.ticks=0.35) +} + +pdf(width=2*3.5, height=2*3.25, pointsize=8, file='ExtData_Fig8_sSNVs.pdf') +layout(matrix(1:16,nrow=4)) +par(mar=c(4,4,2,1)) +for (i in 1:length(states)) + pf(snv.states[[i]], main=states[i]) +dev.off() + + + +pdf(width=2*3.5, height=2*3.25, pointsize=8, file='ExtData_Fig8_sIndels.pdf') +layout(matrix(1:16,nrow=4)) +par(mar=c(4,4,2,1)) +for (i in 1:length(states)) + pf(indel.states[[i]], main=states[i]) +dev.off() diff --git a/figure_5/panel_ef.R b/figure_5/panels_ef.R similarity index 86% rename from figure_5/panel_ef.R rename to figure_5/panels_ef.R index 29e4068..fa6493f 100644 --- a/figure_5/panel_ef.R +++ b/figure_5/panels_ef.R @@ -32,19 +32,23 @@ indel.proximal <- sapply(meta$EID, function(eid) { pf <- function(x, xticks, no.x=FALSE, no.y=FALSE, ...) { anatomy <- meta[colnames(x)]$ANATOMY + type <- meta[colnames(x)]$TYPE + pfc <- meta[STD_NAME=='Brain_Dorsolateral_Prefrontal_Cortex']$EID ylab <- if (no.y) '' else 'Significance: -log10(p-value)' xlab <- if (no.x) '' else 'Enrichment (or depletion) ratio (obs/exp)' plot(t(x), - col=ifelse(anatomy=='BRAIN', 'red', ifelse(x[2,] > 1, 'black', 'grey')), + col=ifelse(anatomy=='BRAIN' & type=='PrimaryTissue', + 'red', ifelse(x[2,] > 1, 'black', 'grey')), bty='n', xaxt='n', yaxt='n', pch=20, xlim=range(xticks), ylim=c(0,4), cex=2, ylab=ylab, xlab=xlab, ...) # replot brain points on top of others - points(t(x[,anatomy=='BRAIN']), col='red', + points(t(x[,anatomy=='BRAIN' & type=='PrimaryTissue']), col='red', pch=20, cex=2) abline(h=1, lty='dotted', col='black', lwd=0.55) abline(v=1, lty='dotted', col='black', lwd=0.55) + segments(1, 2, x[1, pfc], x[2, pfc], col='red') if (!no.y) axis(side=2, at=0:4, lwd=0.35, lwd.ticks=0.35) if (!no.x) axis(side=1, at=xticks, lwd=0.35, lwd.ticks=0.35) }