Skip to content

Commit

Permalink
Merge pull request #2 from taylor-lab/feature/facets2N
Browse files Browse the repository at this point in the history
Feature/facets2 n
  • Loading branch information
rptashkin authored Apr 30, 2020
2 parents ba49e98 + c3013ea commit 1399b5c
Show file tree
Hide file tree
Showing 11 changed files with 161 additions and 68 deletions.
12 changes: 7 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
Package: facetsSuite
Type: Package
Title: Functions to run and parse output from FACETS
Version: 1.0.1
Version: 2.0.5
Authors@R: person(given = "Philip",
family = "Jonsson",
role = c("aut", "cre"),
email = "[email protected]")
URL: https://github.com/mskcc/facetsSuite
URL: https://github.com/mskcc/facets-suite
Description: This package provides functions that wrap the FACETS package for allele-specific copy-number analysis as well as functions to perform post-hoc analyses thereof.
License: MIT + file LICENSE
RoxygenNote: 7.0.2
LazyData: true
Encoding: UTF-8
Depends:
R (>= 3.4.0), pctGCdata (>= 0.2.0)
R (>= 3.4.0), pctGCdata (>= 0.3.0)
Imports:
data.table (>= 1.11.8),
dplyr (>= 0.8.3),
Expand All @@ -25,9 +25,11 @@ Imports:
egg (>= 0.4.2),
scales (>= 1.0.0),
argparse (>= 1.1.1),
tibble (>= 2.1.3)
tibble (>= 2.1.3),
facets (>= 0.5.6),
facets2n (>= 0.2.6)
NeedsCompilation: no
Packaged: 2020-02-05 12:05:36 UTC; rptashkin
Packaged: 2020-04-29 20:11:41 UTC; rptashkin
Suggests:
covr (>= 3.2.0),
testthat (>= 2.1.0)
4 changes: 2 additions & 2 deletions R/ccf-annotate-maf.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ ccf_annotate_maf = function(maf,
maf[, c('ccf_Mcopies', 'ccf_Mcopies_lower', 'ccf_Mcopies_upper', 'ccf_Mcopies_prob95', 'ccf_Mcopies_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
maf[, c('ccf_1copy', 'ccf_1copy_lower', 'ccf_1copy_upper', 'ccf_1copy_prob95', 'ccf_1copy_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
estimate_ccf(purity, tcn, 1, t_alt_count, t_depth), by = seq_len(nrow(maf))]
maf[, c('ccf_expected_copies', 'ccf_expected_copies_lower', 'ccf_expected_copies_upper',
'ccf_expected_copies_prob95', 'ccf_expected_copies_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
estimate_ccf(purity, tcn, expected_alt_copies, t_alt_count, t_depth), by = seq_len(nrow(maf))]
as.data.frame(maf)
}

Expand Down
76 changes: 62 additions & 14 deletions R/check-fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ check_fit = function(facets_output,
dipLogR = facets_output$dipLogR
alballogr = as.numeric(facets_output$alballogr[, 1])
purity = facets_output$purity
mafr_thresh = facets_output$mafr_thresh

fcna_output = calculate_fraction_cna(segs, facets_output$ploidy, genome, algorithm)
wgd = fcna_output$genome_doubled

Expand All @@ -66,9 +66,9 @@ check_fit = function(facets_output,
setkey(segs, chrom, start, end)
snps = as.data.table(snps)
setkey(snps, chrom, maploc)

# Label clonal segments
segs[, clonal := cf >= purity - .1]
segs[, clonal := cf >= (purity * 0.8)]

# Subset on autosomes
auto_segs = segs[chrom < 23]
Expand All @@ -92,6 +92,34 @@ check_fit = function(facets_output,
n_dip_imbal_segs = nrow(dip_imbal_segs)
frac_dip_imbal_segs = sum(dip_imbal_segs$length)/sum(auto_segs$length)

#############################
### Other metrics
#############################
## fraction genome unaltered (2-1) -- too high --> low purity?
segs_unaltered = auto_segs[tcn == 2 & lcn == 1, ]
n_segs_unaltered = nrow(segs_unaltered)
frac_genome_unaltered = sum(segs_unaltered$length)/sum(auto_segs$length)

segs_below_dipLogR = auto_segs[cnlr.median.clust < dipLogR] # part of ploidy filter
n_segs_below_dipLogR = nrow(segs_below_dipLogR) # part of ploidy filter
frac_below_dipLogR = sum(segs_below_dipLogR$length)/sum(auto_segs$length)

segs_balanced_odd_tcn = auto_segs[mafR <= 0.025 & (tcn %% 2) != 0 & clonal ==T & tcn < 8,]
n_segs_balanced_odd_tcn = nrow(segs_balanced_odd_tcn)
frac_balanced_odd_tcn = sum(segs_balanced_odd_tcn$length)/sum(auto_segs$length)

segs_imbalanced_diploid_cn = auto_segs[clonal==T & mafR > 0.1 & !is.na(lcn) & lcn != 0 & (as.double(tcn) / lcn) == 2, ]
n_segs_imbalanced_diploid_cn = nrow(segs_imbalanced_diploid_cn)
frac_imbalanced_diploid_cn = sum(segs_imbalanced_diploid_cn$length)/sum(auto_segs$length)

segs_lcn_greater_mcn = auto_segs[clonal==T & lcn < mcn,]
n_segs_lcn_greater_mcn = nrow(segs_lcn_greater_mcn)
frac_lcn_greater_mcn = sum(segs_lcn_greater_mcn$length)/sum(auto_segs$length)

mafr_median_all = median(auto_segs$mafR, na.rm = T)
mafr_median_clonal = median(auto_segs[clonal==T, ]$mafR, na.rm = T)
mafr_n_gt_1 = nrow(auto_segs[mafR > 1, ])

# Number of high-level amplifications and homozygous deletions
# Clonal homdels, how much of the genome do they represent
n_amps = nrow(segs[tcn >= 10])
Expand All @@ -110,28 +138,32 @@ check_fit = function(facets_output,
n_cnlr_clusters = length(unique(segs$cnlr.median.clust))

# Number of segments with lcn==NA
n_lcn_na = nrow(segs[is.na(lcn)])
n_lcn_na = nrow(auto_segs[is.na(lcn)])
frac_lcn_na = sum(auto_segs[is.na(lcn)]$length)/sum(auto_segs$length)

# Number of segments with LOH
loh_segs = auto_segs[lcn == 0,]
n_loh = nrow(loh_segs)
frac_loh = sum(loh_segs$length)/sum(auto_segs$length)

# Check fraction of subclonal events
subclonal_segs = segs[clonal == F, ]
# Check fraction of subclonal events within autosomal chromosomes
subclonal_segs = auto_segs[clonal == F, ]
n_segs_subclonal = nrow(subclonal_segs)
frac_segs_subclonal = sum(subclonal_segs$length)/sum(segs$length)

frac_segs_subclonal = sum(subclonal_segs$length)/sum(auto_segs$length)
# Compile SNP statistics
n_snps = nrow(snps)
het_snps = snps[het == 1]
n_het_snps = nrow(het_snps)
frac_het_snps = n_het_snps/n_snps
n_het_snps_hom_in_tumor_1pct = nrow(het_snps[ vafT < 0.01 | vafT > 0.99, ])
n_het_snps_hom_in_tumor_5pct = nrow(het_snps[ vafT < 0.05 | vafT > 0.95, ])

n_snps_with_300x_in_tumor = nrow(snps[rCountT > 300, ])
n_het_snps_with_300x_in_tumor = nrow(het_snps[rCountT > 300, ])
n_het_snps_hom_in_tumor_1pct = nrow(het_snps[ (vafT < 0.01 | vafT > 0.99), ])
n_het_snps_hom_in_tumor_5pct = nrow(het_snps[ (rCountN > 35 & rCountT > 35) & (vafT < 0.05 | vafT > 0.95), ])
frac_het_snps_hom_in_tumor_1pct = n_het_snps_hom_in_tumor_1pct/n_het_snps
frac_het_snps_hom_in_tumor_5pct = n_het_snps_hom_in_tumor_5pct/n_het_snps

# Check the mean/standard deviation of the cnlr
snps[, cnlr_residual := cnlr - median(cnlr), by = seg]
mean_cnlr_residual = mean(snps$cnlr_residual)
Expand Down Expand Up @@ -173,6 +205,7 @@ check_fit = function(facets_output,
output = list(
dipLogR_flag = dipLogR_flag,
n_alternative_dipLogR = n_alt_dipLogR,
wgd = wgd,
n_dip_bal_segs = n_dip_bal_segs,
frac_dip_bal_segs = frac_dip_bal_segs,
n_dip_imbal_segs = n_dip_imbal_segs,
Expand All @@ -190,9 +223,19 @@ check_fit = function(facets_output,
frac_loh = frac_loh,
n_segs_subclonal = n_segs_subclonal,
frac_segs_subclonal = frac_segs_subclonal,
n_segs_below_dipLogR = n_segs_below_dipLogR,
frac_below_dipLogR = frac_below_dipLogR,
n_segs_balanced_odd_tcn = n_segs_balanced_odd_tcn,
frac_balanced_odd_tcn = frac_balanced_odd_tcn,
n_segs_imbalanced_diploid_cn = n_segs_imbalanced_diploid_cn,
frac_imbalanced_diploid_cn = frac_imbalanced_diploid_cn,
n_segs_lcn_greater_mcn = n_segs_lcn_greater_mcn,
frac_lcn_greater_mcn = frac_lcn_greater_mcn,
n_snps = n_snps,
n_het_snps = n_het_snps,
frac_het_snps = frac_het_snps,
n_snps_with_300x_in_tumor = n_snps_with_300x_in_tumor,
n_het_snps_with_300x_in_tumor = n_het_snps_with_300x_in_tumor,
n_het_snps_hom_in_tumor_1pct = n_het_snps_hom_in_tumor_1pct,
n_het_snps_hom_in_tumor_5pct = n_het_snps_hom_in_tumor_5pct,
frac_het_snps_hom_in_tumor_1pct = frac_het_snps_hom_in_tumor_1pct,
Expand All @@ -206,22 +249,27 @@ check_fit = function(facets_output,
n_segs_discordant_both = discordant_stats$n_discordant_both,
frac_discordant_both = discordant_stats$length_discordant_both/evaluable_length$both,
n_segs_icn_cnlor_discordant = n_icn_cnlor_discordant,
frac_icn_cnlor_discordant = frac_icn_cnlor_discordant
frac_icn_cnlor_discordant = frac_icn_cnlor_discordant,
mafr_median_all = mafr_median_all,
mafr_median_clonal = mafr_median_clonal,
mafr_n_gt_1 = mafr_n_gt_1
)



# If input MAF is provided add some stats based on this
if (!is.null(maf)) {
maf = as.data.table(maf)

maf[, `:=` (
Chromosome = ifelse(Chromosome == 'X', 23, Chromosome),
t_var_freq = t_alt_count/(t_alt_count+t_ref_count)
)][, Chromosome := as.integer(Chromosome)]
)][, Chromosome := as.integer(Chromosome)]
setkey(maf, Chromosome, Start_Position, End_Position)
maf = foverlaps(maf, segs, mult = 'first', nomatch = NA,
by.x = c('Chromosome', 'Start_Position', 'End_Position'),
by.y = c('chrom', 'start', 'end'))

# Median mutation VAF at clonal 2:1 segments
output$dip_median_vaf = maf[clonal == TRUE & mcn == 1 & lcn == 1][, median(t_var_freq)]

Expand Down
31 changes: 15 additions & 16 deletions R/plot-facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ cnlr_plot = function(facets_data,

ymin = floor(min(segs$cnlr.median, na.rm = T))
ymax = ceiling(max(segs$cnlr.median, na.rm = T))
if (ymin > -3) ymin = -3
if (ymax < 3) ymax = 3
if (ymin > -2) ymin = -2
if (ymax < 2) ymax = 2

if (adjust_dipLogR) {
snps$cnlr = snps$cnlr - dipLogR
Expand Down Expand Up @@ -240,9 +240,14 @@ cf_plot = function(facets_data,

starts = cumsum(c(1, segs$num.mark))[seq_along(segs$num.mark)]
ends = cumsum(c(segs$num.mark))
my_starts = snps[starts, 'chr_maploc']
my_ends = snps[ends, 'chr_maploc']

my_starts = snps[starts, 'chr_maploc']
my_ends = snps[ends, 'chr_maploc']

cf = ggplot(segs) +
geom_rect(aes(xmin = starts, xmax = ends, ymax = 1, ymin = 0),
geom_rect(aes(xmin = my_starts, xmax = my_ends, ymax = 1, ymin = 0),
fill = cols, col = 'white', size = 0) +
scale_x_continuous(breaks = mid, labels = names(mid), expand = c(.01, 0)) +
scale_y_continuous(expand = c(0, 0)) +
Expand Down Expand Up @@ -277,8 +282,8 @@ icn_plot = function(facets_data,
genome = match.arg(genome, c('hg19', 'hg18', 'hg38'), several.ok = FALSE)
method = match.arg(method, c('em', 'cncf'), several.ok = FALSE)

snps = facets_data$snps
segs = facets_data$segs
snps = facets_data$snps %>% data.table
segs = facets_data$segs %>% data.table
if (!plotX) {
snps = subset(snps, chrom < 23)
segs = subset(segs, chrom < 23)
Expand Down Expand Up @@ -310,15 +315,19 @@ icn_plot = function(facets_data,
my_tcn_ends = snps[ends, c('chr_maploc', 'tcn')]
my_lcn_starts = snps[starts, c('chr_maploc', 'lcn')]
my_lcn_ends = snps[ends, c('chr_maploc', 'lcn')]

axis_breaks = c(0:5, 5 + (sort(unique(segs[tcn >5, tcn])) - 5)/3)
axis_labels = c(0:5, sort(unique(segs[tcn >5, tcn])))

icn = ggplot(segs) +
geom_segment(col = 'red', size = 1,
aes(x = my_lcn_starts$chr_maploc, xend = my_lcn_ends$chr_maploc,
y = my_lcn_starts$lcn, yend = my_lcn_ends$lcn)) +
geom_segment(col = 'black', size = 1,
geom_segment(col = 'black', size = 0.8,
aes(x = my_tcn_starts$chr_maploc, xend = my_tcn_ends$chr_maploc,
y = my_tcn_starts$tcn, yend = my_tcn_ends$tcn)) +
# scale_y_continuous(breaks=c(0:5, 5 + seq_len(35) / 3), labels = 0:40, limits = c(0, NA)) +
scale_y_continuous(breaks=axis_breaks, labels=axis_labels) +
scale_x_continuous(breaks = mid, labels = names(mid), expand = c(.01, 0)) +
labs(x = NULL, y = my_ylab) +
theme_bw() +
Expand All @@ -330,16 +339,6 @@ icn_plot = function(facets_data,
panel.grid.minor.y = element_blank(),
plot.margin = unit(c(0, 1, 0, 0), 'lines'))

max_tcn = max(segs$tcn, na.rm = T)
if (max_tcn > 10) {
top = 5*round(43/5)
icn = icn +
scale_y_continuous(breaks = c(0:10, seq(15, 45, 5)), labels = c(0:10, seq(15, 45, 5)), limits = c(0, max_tcn))
} else {
icn = icn +
scale_y_continuous(breaks = 0:max_tcn, labels = 0:max_tcn, limits = c(0, max_tcn))
}

if (!is.null(highlight_gene)) {
if (is.character(highlight_gene)) {
highlight_gene = get_gene_position(gene_pos)
Expand Down
5 changes: 3 additions & 2 deletions R/read-snp-matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ read_snp_matrix = function(input_file,

read_snp_matrix_facets2n = function(filename, err.thresh=Inf, del.thresh=Inf, perl.pileup=FALSE,
MandUnormal=FALSE, spanT=0.2, spanA=0.2, spanX=0.2, gbuild="hg19",
ReferencePileupFile=NULL, ReferenceLoessFile=NULL, MinOverlap=0.9, useMatchedX=FALSE){
ReferencePileupFile=NULL, ReferenceLoessFile=NULL, MinOverlap=0.9, useMatchedX=FALSE, refX=FALSE){
#' #' Read in the snp-pileup generated SNP read count matrix file for facets2n processing
#' @importFrom utils read.csv
#' @param filename counts file from snp-pileup
Expand All @@ -62,13 +62,14 @@ read_snp_matrix_facets2n = function(filename, err.thresh=Inf, del.thresh=Inf, pe
#' @param ReferenceLoessFile (character) Filepath to an optional loess data, generated using the facets2n package, of one or more reference normals. The number of normals in this data should match that in the ReferencePileupFile, and should be in the same order.
#' @param MinOverlap (numeric) Mininum overlap fraction of loci between a tumor pileup and reference pileup data.
#' @param useMatchedX (logical) Is the matched normal to be used for ChrX normalization?
#' @param refX (logical) Use sex matched reference normal for chrX normalization
#' @return A dataframe of pileup depth values for Tumor and Matched Normal if MandUnormal is FALSE. Else, a list of data frame with pileup depth values of Tumor, matched Normal, and a best unmatched normal, and the associated span values.
#' @source \code{snp-pileup} is part of \href{www.github.com/mskcc/facets}{FACETS}.
#' @import facets2n
#' @importFrom pctGCdata getGCpct
#' @export
if (MandUnormal){
facets2n::readSnpMatrix(filename, MandUnormal = MandUnormal, ReferencePileupFile = ReferencePileupFile, ReferenceLoessFile = ReferenceLoessFile, useMatchedX = useMatchedX)
facets2n::readSnpMatrix(filename, MandUnormal = MandUnormal, ReferencePileupFile = ReferencePileupFile, ReferenceLoessFile = ReferenceLoessFile, useMatchedX = useMatchedX, refX = refX)
}else{
facets2n::readSnpMatrix(filename, MandUnormal = MandUnormal)
}
Expand Down
22 changes: 20 additions & 2 deletions R/run-facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#' }
#'
#' @import facets
# @import pctGCdata
#' @importFrom pctGCdata getGCpct
#' @import facets2n

Expand Down Expand Up @@ -91,7 +90,7 @@ run_facets = function(read_counts,
gbuild = genome, hetscale = TRUE, unmatched = FALSE, ndepthmax = 5000)
}
out = facets2n::procSample(dat, cval = cval, min.nhet = min_nhet, dipLogR = dipLogR)
fit = facets::emcncf(out, min.nhet = min_nhet)
fit = facets2n::emcncf(out, min.nhet = min_nhet)

}else{
dat = facets::preProcSample(read_counts, ndepth = ndepth, het.thresh = 0.25, snp.nbhd = snp_nbhd, cval = 25,
Expand Down Expand Up @@ -223,4 +222,23 @@ create_legacy_output <- function(facets_output, directory, sample_id, counts_fil
out_txt = paste0(out_txt, '# ', run$flags,'\n\n')

cat(out_txt, file=paste0(directory, '/', sample_id, '.out'))

#try default facets2n plotting, continue if no facets2n lib
tryCatch({
outfile = paste0(output_prefix, "_legacy.tiff")
plot_title = paste0(basename(output_prefix),
' | cval=', cval,
' | purity_cval=', purity_cval,
' | purity=', round(fit$purity, 2),
' | ploidy=', round(fit$ploidy, 2),
' | dipLogR=', round(fit$dipLogR, 2))
message(plot_title)
tiff(file = outfile, width = 6.5, height = 8, res = 300,units = 'in')
facets2n::plotSample(x = out, emfit = fit, plot.type = "both", sname = plot_title)
dev.off()
},
error= function(cond) {message(cond); return(NA)},
warning=function(cond) {message(cond); return(NA)},
finally = NULL
)
}
Loading

0 comments on commit 1399b5c

Please sign in to comment.