Skip to content

Commit

Permalink
Merge pull request #1 from taylor-lab/feature/facets2N
Browse files Browse the repository at this point in the history
Feature/facets2 n
  • Loading branch information
rptashkin authored Mar 2, 2020
2 parents 5e9c401 + 3a889e9 commit 4f2f8ae
Show file tree
Hide file tree
Showing 21 changed files with 529 additions and 144 deletions.
12 changes: 7 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
Package: facetsSuite
Type: Package
Title: Functions to run and parse output from FACETS
Version: 2.0.0
Version: 2.0.4
Authors@R: person(given = "Philip",
family = "Jonsson",
role = c("aut", "cre"),
email = "[email protected]")
URL: https://github.com/mskcc/facets-suite
Description: This package provides functions that wrap the FACETS package for allele-specific copy-number analysis as well as functions to perform post-hoc analyses thereof.
License: MIT + file LICENSE
RoxygenNote: 6.1.1
RoxygenNote: 7.0.2
LazyData: true
Encoding: UTF-8
Depends:
R (>= 2.10)
R (>= 3.4.0), pctGCdata (>= 0.3.0)
Imports:
data.table (>= 1.11.8),
dplyr (>= 0.8.3),
Expand All @@ -25,9 +25,11 @@ Imports:
egg (>= 0.4.2),
scales (>= 1.0.0),
argparse (>= 1.1.1),
tibble (>= 2.1.3)
tibble (>= 2.1.3),
facets (>= 0.5.6),
facets2n (>= 0.2.2)
NeedsCompilation: no
Packaged: 2019-02-07 16:38:38 UTC; jonssonp
Packaged: 2020-03-01 21:56:37 UTC; rptashkin
Suggests:
covr (>= 3.2.0),
testthat (>= 2.1.0)
9 changes: 7 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@ export(cf_plot)
export(check_fit)
export(closeup_plot)
export(cnlr_plot)
export(create_legacy_output)
export(format_igv_seg)
export(gene_level_changes)
export(icn_plot)
export(load_facets_output)
export(plot_segmentation)
export(read_snp_matrix)
export(read_snp_matrix_facets2n)
export(run_facets)
export(create_legacy_output)
export(load_facets_output)
export(valor_plot)
import(data.table)
import(facets)
import(facets2n)
import(ggplot2)
importFrom(data.table,":=")
importFrom(data.table,foverlaps)
Expand All @@ -45,6 +48,7 @@ importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(egg,ggarrange)
importFrom(grDevices,colorRampPalette)
importFrom(pctGCdata,getGCpct)
importFrom(plyr,mapvalues)
importFrom(purrr,discard)
importFrom(purrr,map_chr)
Expand All @@ -55,3 +59,4 @@ importFrom(scales,pretty_breaks)
importFrom(stats,dbinom)
importFrom(tidyr,gather)
importFrom(tidyr,separate_rows)
importFrom(utils,read.csv)
4 changes: 2 additions & 2 deletions R/ccf-annotate-maf.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ ccf_annotate_maf = function(maf,
maf[, c('ccf_Mcopies', 'ccf_Mcopies_lower', 'ccf_Mcopies_upper', 'ccf_Mcopies_prob95', 'ccf_Mcopies_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
maf[, c('ccf_1copy', 'ccf_1copy_lower', 'ccf_1copy_upper', 'ccf_1copy_prob95', 'ccf_1copy_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
estimate_ccf(purity, tcn, 1, t_alt_count, t_depth), by = seq_len(nrow(maf))]
maf[, c('ccf_expected_copies', 'ccf_expected_copies_lower', 'ccf_expected_copies_upper',
'ccf_expected_copies_prob95', 'ccf_expected_copies_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
estimate_ccf(purity, tcn, expected_alt_copies, t_alt_count, t_depth), by = seq_len(nrow(maf))]
as.data.frame(maf)
}

Expand Down
75 changes: 61 additions & 14 deletions R/check-fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ check_fit = function(facets_output,
dipLogR = facets_output$dipLogR
alballogr = as.numeric(facets_output$alballogr[, 1])
purity = facets_output$purity
mafr_thresh = facets_output$mafr_thresh

fcna_output = calculate_fraction_cna(segs, facets_output$ploidy, genome, algorithm)
wgd = fcna_output$genome_doubled

Expand All @@ -66,9 +66,9 @@ check_fit = function(facets_output,
setkey(segs, chrom, start, end)
snps = as.data.table(snps)
setkey(snps, chrom, maploc)

# Label clonal segments
segs[, clonal := cf >= purity - .1]
segs[, clonal := cf >= (purity * 0.8)]

# Subset on autosomes
auto_segs = segs[chrom < 23]
Expand All @@ -92,6 +92,34 @@ check_fit = function(facets_output,
n_dip_imbal_segs = nrow(dip_imbal_segs)
frac_dip_imbal_segs = sum(dip_imbal_segs$length)/sum(auto_segs$length)

#############################
### Other metrics
#############################
## fraction genome unaltered (2-1) -- too high --> low purity?
segs_unaltered = auto_segs[tcn == 2 & lcn == 1, ]
n_segs_unaltered = nrow(segs_unaltered)
frac_genome_unaltered = sum(segs_unaltered$length)/sum(auto_segs$length)

segs_below_dipLogR = auto_segs[cnlr.median.clust < dipLogR] # part of ploidy filter
n_segs_below_dipLogR = nrow(segs_below_dipLogR) # part of ploidy filter
frac_below_dipLogR = sum(segs_below_dipLogR$length)/sum(auto_segs$length)

segs_balanced_odd_tcn = auto_segs[mafR <= 0.025 & (tcn %% 2) != 0 & clonal ==T & tcn < 8,]
n_segs_balanced_odd_tcn = nrow(segs_balanced_odd_tcn)
frac_balanced_odd_tcn = sum(segs_balanced_odd_tcn$length)/sum(auto_segs$length)

segs_imbalanced_diploid_cn = auto_segs[clonal==T & mafR > 0.1 & !is.na(lcn) & lcn != 0 & (as.double(tcn) / lcn) == 2, ]
n_segs_imbalanced_diploid_cn = nrow(segs_imbalanced_diploid_cn)
frac_imbalanced_diploid_cn = sum(segs_imbalanced_diploid_cn$length)/sum(auto_segs$length)

segs_lcn_greater_mcn = auto_segs[clonal==T & lcn < mcn,]
n_segs_lcn_greater_mcn = nrow(segs_lcn_greater_mcn)
frac_lcn_greater_mcn = sum(segs_lcn_greater_mcn$length)/sum(auto_segs$length)

mafr_median_all = median(auto_segs$mafR, na.rm = T)
mafr_median_clonal = median(auto_segs[clonal==T, ]$mafR, na.rm = T)
mafr_n_gt_1 = nrow(auto_segs[mafR > 1, ])

# Number of high-level amplifications and homozygous deletions
# Clonal homdels, how much of the genome do they represent
n_amps = nrow(segs[tcn >= 10])
Expand All @@ -110,28 +138,32 @@ check_fit = function(facets_output,
n_cnlr_clusters = length(unique(segs$cnlr.median.clust))

# Number of segments with lcn==NA
n_lcn_na = nrow(segs[is.na(lcn)])
n_lcn_na = nrow(auto_segs[is.na(lcn)])
frac_lcn_na = sum(auto_segs[is.na(lcn)]$length)/sum(auto_segs$length)

# Number of segments with LOH
loh_segs = auto_segs[lcn == 0,]
n_loh = nrow(loh_segs)
frac_loh = sum(loh_segs$length)/sum(auto_segs$length)

# Check fraction of subclonal events
subclonal_segs = segs[clonal == F, ]
# Check fraction of subclonal events within autosomal chromosomes
subclonal_segs = auto_segs[clonal == F, ]
n_segs_subclonal = nrow(subclonal_segs)
frac_segs_subclonal = sum(subclonal_segs$length)/sum(segs$length)

frac_segs_subclonal = sum(subclonal_segs$length)/sum(auto_segs$length)
# Compile SNP statistics
n_snps = nrow(snps)
het_snps = snps[het == 1]
n_het_snps = nrow(het_snps)
frac_het_snps = n_het_snps/n_snps
n_het_snps_hom_in_tumor_1pct = nrow(het_snps[ vafT < 0.01 | vafT > 0.99, ])
n_het_snps_hom_in_tumor_5pct = nrow(het_snps[ vafT < 0.05 | vafT > 0.95, ])

n_snps_with_300x_in_tumor = nrow(snps[rCountT > 300, ])
n_het_snps_with_300x_in_tumor = nrow(het_snps[rCountT > 300, ])
n_het_snps_hom_in_tumor_1pct = nrow(het_snps[ (vafT < 0.01 | vafT > 0.99), ])
n_het_snps_hom_in_tumor_5pct = nrow(het_snps[ (rCountN > 35 & rCountT > 35) & (vafT < 0.05 | vafT > 0.95), ])
frac_het_snps_hom_in_tumor_1pct = n_het_snps_hom_in_tumor_1pct/n_het_snps
frac_het_snps_hom_in_tumor_5pct = n_het_snps_hom_in_tumor_5pct/n_het_snps

# Check the mean/standard deviation of the cnlr
snps[, cnlr_residual := cnlr - median(cnlr), by = seg]
mean_cnlr_residual = mean(snps$cnlr_residual)
Expand Down Expand Up @@ -191,9 +223,19 @@ check_fit = function(facets_output,
frac_loh = frac_loh,
n_segs_subclonal = n_segs_subclonal,
frac_segs_subclonal = frac_segs_subclonal,
n_segs_below_dipLogR = n_segs_below_dipLogR,
frac_below_dipLogR = frac_below_dipLogR,
n_segs_balanced_odd_tcn = n_segs_balanced_odd_tcn,
frac_balanced_odd_tcn = frac_balanced_odd_tcn,
n_segs_imbalanced_diploid_cn = n_segs_imbalanced_diploid_cn,
frac_imbalanced_diploid_cn = frac_imbalanced_diploid_cn,
n_segs_lcn_greater_mcn = n_segs_lcn_greater_mcn,
frac_lcn_greater_mcn = frac_lcn_greater_mcn,
n_snps = n_snps,
n_het_snps = n_het_snps,
frac_het_snps = frac_het_snps,
n_snps_with_300x_in_tumor = n_snps_with_300x_in_tumor,
n_het_snps_with_300x_in_tumor = n_het_snps_with_300x_in_tumor,
n_het_snps_hom_in_tumor_1pct = n_het_snps_hom_in_tumor_1pct,
n_het_snps_hom_in_tumor_5pct = n_het_snps_hom_in_tumor_5pct,
frac_het_snps_hom_in_tumor_1pct = frac_het_snps_hom_in_tumor_1pct,
Expand All @@ -207,22 +249,27 @@ check_fit = function(facets_output,
n_segs_discordant_both = discordant_stats$n_discordant_both,
frac_discordant_both = discordant_stats$length_discordant_both/evaluable_length$both,
n_segs_icn_cnlor_discordant = n_icn_cnlor_discordant,
frac_icn_cnlor_discordant = frac_icn_cnlor_discordant
frac_icn_cnlor_discordant = frac_icn_cnlor_discordant,
mafr_median_all = mafr_median_all,
mafr_median_clonal = mafr_median_clonal,
mafr_n_gt_1 = mafr_n_gt_1
)



# If input MAF is provided add some stats based on this
if (!is.null(maf)) {
maf = as.data.table(maf)

maf[, `:=` (
Chromosome = ifelse(Chromosome == 'X', 23, Chromosome),
t_var_freq = t_alt_count/(t_alt_count+t_ref_count)
)][, Chromosome := as.integer(Chromosome)]
)][, Chromosome := as.integer(Chromosome)]
setkey(maf, Chromosome, Start_Position, End_Position)
maf = foverlaps(maf, segs, mult = 'first', nomatch = NA,
by.x = c('Chromosome', 'Start_Position', 'End_Position'),
by.y = c('chrom', 'start', 'end'))

# Median mutation VAF at clonal 2:1 segments
output$dip_median_vaf = maf[clonal == TRUE & mcn == 1 & lcn == 1][, median(t_var_freq)]

Expand Down
10 changes: 6 additions & 4 deletions R/plot-facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ cnlr_plot = function(facets_data,

ymin = floor(min(segs$cnlr.median, na.rm = T))
ymax = ceiling(max(segs$cnlr.median, na.rm = T))
if (ymin > -3) ymin = -3
if (ymax < 3) ymax = 3
if (ymin > -2) ymin = -2
if (ymax < 2) ymax = 2

if (adjust_dipLogR) {
snps$cnlr = snps$cnlr - dipLogR
Expand Down Expand Up @@ -240,9 +240,11 @@ cf_plot = function(facets_data,

starts = cumsum(c(1, segs$num.mark))[seq_along(segs$num.mark)]
ends = cumsum(c(segs$num.mark))
my_starts = snps[starts, 'chr_maploc']
my_ends = snps[ends, 'chr_maploc']

cf = ggplot(segs) +
geom_rect(aes(xmin = starts, xmax = ends, ymax = 1, ymin = 0),
geom_rect(aes(xmin = my_starts, xmax = my_ends, ymax = 1, ymin = 0),
fill = cols, col = 'white', size = 0) +
scale_x_continuous(breaks = mid, labels = names(mid), expand = c(.01, 0)) +
scale_y_continuous(expand = c(0, 0)) +
Expand Down Expand Up @@ -315,7 +317,7 @@ icn_plot = function(facets_data,
geom_segment(col = 'red', size = 1,
aes(x = my_lcn_starts$chr_maploc, xend = my_lcn_ends$chr_maploc,
y = my_lcn_starts$lcn, yend = my_lcn_ends$lcn)) +
geom_segment(col = 'black', size = 1,
geom_segment(col = 'black', size = 0.8,
aes(x = my_tcn_starts$chr_maploc, xend = my_tcn_ends$chr_maploc,
y = my_tcn_starts$tcn, yend = my_tcn_ends$tcn)) +
# scale_y_continuous(breaks=c(0:5, 5 + seq_len(35) / 3), labels = 0:40, limits = c(0, NA)) +
Expand Down
71 changes: 50 additions & 21 deletions R/read-snp-matrix.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
#' Read counts file
#'
#' Reads a gzipped output file from \code{snp-pileup}.
#'
#' @param input_file Path to input file.
#' @param err.thresh Threshold for errors at locus.
#' @param del.thresh Threshold for deletions at locus.
#'
#' @source \code{snp-pileup} is part of \href{www.github.com/mskcc/facets}{FACETS}.
#'
#' @return A SNP count matrix, with the following columns:
#' \itemize{
#' \item{\code{Chromosome} and \code{Position}:} {Chromosomal position of SNP.}
#' \item{\code{NOR.DP}:} {Total read depth in normal sample.}
#' \item{\code{TUM.DP}:} {Total read depth in tumor sample.}
#' \item{\code{NOR.RD}:} {Reference allele read depth in normal sample.}
#' \item{\code{TUM.RD}:} {Reference allele read depth in tumor sample.}
#' }
#'
#' @import data.table

#' @export
read_snp_matrix = function(input_file,
err.thresh = 10,
del.thresh = 10) {
#' Reads a gzipped output file from \code{snp-pileup}.
#'
#' @param input_file Path to input file.
#' @param err.thresh Threshold for errors at locus.
#' @param del.thresh Threshold for deletions at locus.
#'
#' @source \code{snp-pileup} is part of \href{www.github.com/mskcc/facets}{FACETS}.
#'
#' @return A SNP count matrix, with the following columns:
#' \itemize{
#' \item{\code{Chromosome} and \code{Position}:} {Chromosomal position of SNP.}
#' \item{\code{NOR.DP}:} {Total read depth in normal sample.}
#' \item{\code{TUM.DP}:} {Total read depth in tumor sample.}
#' \item{\code{NOR.RD}:} {Reference allele read depth in normal sample.}
#' \item{\code{TUM.RD}:} {Reference allele read depth in tumor sample.}
#' }
#'
#' @import data.table
#' @export

read_counts = data.table::fread(cmd = paste('gunzip -c', input_file), key = c('Chromosome', 'Position'))

Expand All @@ -44,3 +42,34 @@ read_snp_matrix = function(input_file,

read_counts[order(Chromosome, Position)][, list(Chromosome, Position, NOR.DP, TUM.DP, NOR.RD, TUM.RD)]
}

read_snp_matrix_facets2n = function(filename, err.thresh=Inf, del.thresh=Inf, perl.pileup=FALSE,
MandUnormal=FALSE, spanT=0.2, spanA=0.2, spanX=0.2, gbuild="hg19",
ReferencePileupFile=NULL, ReferenceLoessFile=NULL, MinOverlap=0.9, useMatchedX=FALSE){
#' #' Read in the snp-pileup generated SNP read count matrix file for facets2n processing
#' @importFrom utils read.csv
#' @param filename counts file from snp-pileup
#' @param skip (character) Skip n number of lines in the input file.
#' @param err.thresh (numeric) Error threshold to be used to filter snp-pileup data frame.
#' @param del.thresh (numeric) Deletion threshold to be used to filter snp-pileup data frame.
#' @param perl.pileup (logical) Is the pileup data generated using perl pileup tool?
#' @param MandUnormal (logical) Is CNLR analysis to be peformed using unmatched reference normals?
#' @param spanT (numeric) Default span value to be used for loess normalization in tumor sample.
#' @param spanA (numeric) Default span value to be used for loess normalization across autosomal chromosomes in the normal sample.
#' @param spanX (numeric) Default span value to be used for loess normalization in Chr X in the normal sample.
#' @param gbuild (character) Genome build (Default: hg19).
#' @param ReferencePileupFile (character) Filepath to an optional snp-pileup generated pileup data of one or more reference normals.
#' @param ReferenceLoessFile (character) Filepath to an optional loess data, generated using the facets2n package, of one or more reference normals. The number of normals in this data should match that in the ReferencePileupFile, and should be in the same order.
#' @param MinOverlap (numeric) Mininum overlap fraction of loci between a tumor pileup and reference pileup data.
#' @param useMatchedX (logical) Is the matched normal to be used for ChrX normalization?
#' @return A dataframe of pileup depth values for Tumor and Matched Normal if MandUnormal is FALSE. Else, a list of data frame with pileup depth values of Tumor, matched Normal, and a best unmatched normal, and the associated span values.
#' @source \code{snp-pileup} is part of \href{www.github.com/mskcc/facets}{FACETS}.
#' @import facets2n
#' @importFrom pctGCdata getGCpct
#' @export
if (MandUnormal){
facets2n::readSnpMatrix(filename, MandUnormal = MandUnormal, ReferencePileupFile = ReferencePileupFile, ReferenceLoessFile = ReferenceLoessFile, useMatchedX = useMatchedX)
}else{
facets2n::readSnpMatrix(filename, MandUnormal = MandUnormal)
}
}
Loading

0 comments on commit 4f2f8ae

Please sign in to comment.