Skip to content

Commit

Permalink
code speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
nicola-calonaci committed Apr 8, 2024
1 parent d70bc55 commit dd6810a
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 139 deletions.
236 changes: 118 additions & 118 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -1,121 +1,3 @@
any(!is.integer(genomic_data$NV))
any(!is.numeric(genomic_data$VAF))
any(is.na(gene))
any(is.na(genomic_data$\gene))
any(is.na(genomic_data$gene))
any(is.na(genomic_data$NV))
any(is.na(genomic_data$DP))
genomic_data %>% filter(DP<=0)
genomic_data %>% filter(NV<=0)
genomic_data %>% filter(VAF<=0)
genomic_data %>% filter(is.na(gene))
genomic_data %>% filter(!(gene %in% cancer_gene_census$gene))
input = init(
genomic_data = genomic_data %>% filter(!(gene %in% cancer_gene_census$gene)),
clinical_data = x$clinical_data %>% filter(tumor_type == 'CSCC'),
gene_roles = cancer_gene_census
)
source('./scripts/library.R')
x = prepare_msk_data()
library(INCOMMON)
input = init(
genomic_data = genomic_data %>% filter(!(gene %in% cancer_gene_census$gene)),
clinical_data = x$clinical_data %>% filter(tumor_type == 'CSCC'),
gene_roles = cancer_gene_census
)
output = classify(
x = input,
priors = pcawg_priors,
entropy_cutoff = 0.2,
rho = 0.01
)
priors = pcawg_priors
entropy_cutoff = 0.2
rho = 0.01
entropy_cutoff = 1
output = x
output$classification = list()
cli::cli_h1("INCOMMON inference of copy number and mutation multiplicity for sample {.field {x$sample}}")
cat("\n")
check_input(x)
cli::cli_alert_info("Performing classification")
x = idify(x)
x$input = x$input %>% dplyr::mutate(id = paste(sample, chr,
from, to, ref, alt, NV, DP, sep = ":"))
x$input
input = init(
genomic_data = genomic_data %>% filter(!(gene %in% cancer_gene_census$gene)),
clinical_data = x$clinical_data %>% filter(tumor_type == 'CSCC'),
gene_roles = cancer_gene_census
)
input
input = init(
genomic_data = genomic_data,
clinical_data = x$clinical_data %>% filter(tumor_type == 'CSCC'),
gene_roles = cancer_gene_census
)
output = classify(
x = input,
priors = pcawg_priors,
entropy_cutoff = 0.2,
rho = 0.01
)
devtools::install_github('caravagnalab/INCOMMON')
source('./scripts/library.R')
x = prepare_msk_data()
library(INCOMMON)
input = init(
genomic_data = genomic_data,
clinical_data = x$clinical_data %>% filter(tumor_type == 'CSCC'),
gene_roles = cancer_gene_census
)
input = init(
genomic_data = x$genomic_data,
clinical_data = x$clinical_data %>% filter(tumor_type == 'CSCC'),
gene_roles = cancer_gene_census
)
output = classify(
x = input,
priors = pcawg_priors,
entropy_cutoff = 0.2,
rho = 0.01,
parallel = TRUE,
num_cores = 8
)
devtools::install_github("ebesedina/mutmatch")
setwd('~/Documents/GitHub/INCOMMON/')
devtools::load_all()
genomic_data = MSK_genomic_data
clinical_data = MSK_clinical_data
gene_roles = INCOMMON::cancer_gene_census
x = init(genomic_data,
clinical_data,
gene_roles = INCOMMON::cancer_gene_census)
x
x = init(genomic_data %>% dplyr::slice_head(n=10),
clinical_data,
gene_roles = INCOMMON::cancer_gene_census)
x
priors = INCOMMON::pcawg_priors
entropy_cutoff = 0.2
rho = 0.01
parallel = TRUE
num_cores=2
karyotypes = c("1:0", "1:1", "2:0", "2:1", "2:2")
output = x
output$classification = list()
cli::cli_h1(
"INCOMMON inference of copy number and mutation multiplicity for sample {.field {x$sample}}"
)
cat("\n")
check_input(x)
cli::cli_alert_info("Performing classification")
x = idify(x)
output = x
output$classification = list()
cli::cli_h1(
"INCOMMON inference of copy number and mutation multiplicity for sample {.field {x$sample}}"
)
cat("\n")
check_input(x)
cli::cli_alert_info("Performing classification")
Expand Down Expand Up @@ -510,3 +392,121 @@ names(x$classification)
setwd('~/Documents/GitHub/CNAqc/')
CNAqc:::advanced_phasing
INCOMMON::classify
setwd('~/Documents/GitHub/INCOMMON/')
devtools::load_all()
library(INCOMMON)
library(dplyr)
library(DT)
sample = 'P-0002081'
genomic_data = MSK_genomic_data %>% filter(sample == !!sample)
clinical_data = MSK_clinical_data %>% filter(sample == !!sample)
print(genomic_data)
print(clinical_data)
x = init(genomic_data = genomic_data,
clinical_data = clinical_data,
gene_roles = cancer_gene_census)
print(x)
priors = pcawg_priors
entropy_cutoff = 0.2
rho = 0.01
karyotypes = c("1:0", "1:1", "2:0", "2:1", "2:2")
stopifnot(inherits(x, "INCOMMON"))
if(is.null(entropy_cutoff)) entropy_cutoff = 1
# Output
output = x
output$classification = list()
cli::cli_h1(
"INCOMMON inference of copy number and mutation multiplicity for sample {.field {x$sample}}"
)
cat("\n")
check_input(x)
cli::cli_alert_info("Performing classification")
x = idify(x)
classify_single_mutation = function(x, id){
# Control for duplicates
if(info(x, id) %>% nrow() > 1){
cli_alert_warning(text = "More than one mutation mapped at: {.field {id}}")
info(x, id)
cli_alert_warning(text = "Keeping first row by default (check your input data)")
w = which(ids(x)==id)
x$data = x$data[-w[2:length(w)],]
}
# Compute model likelihood, posterior and entropy
posterior = compute_posterior(
NV = NV(x, id),
DP = DP(x, id),
gene = gene(x, id),
priors = priors,
tumor_type = tumor_type(x, id),
purity = purity(x, id),
entropy_cutoff = entropy_cutoff,
rho = rho,
karyotypes = karyotypes
)
# Maximum a posteriori classification
map = posterior %>%
dplyr::filter(NV == NV(x, id)) %>%
dplyr::filter(value == max(value)) %>%
dplyr::ungroup()
if (nrow(map) > 1) {
cli_alert_warning(text =
"With purity {.field {purity(x, id)}} karyotype {.field {map$karyotype}} with multiplicities {.field {map$multiplicity}} have the same likelihood")
cli_alert_warning(text =
"Simplest case will be selected: {.field {map$karyotype[1]}}")
map = map[1., ]
}
map = map %>% dplyr::select(label, state, value, entropy) %>% dplyr::rename(posterior = value) %>% dplyr::mutate(id = id)
fit = dplyr::right_join(input(x) %>% dplyr::select(colnames(genomic_data(x, PASS = TRUE)), id), map, by = 'id')
return(fit)
}
if(parallel){
if(is.null(num_cores)) num_cores = as.integer(0.8*parallel::detectCores())
tests = parallel::mclapply(X = ids(x),
FUN = classify_single_mutation,
x = x,
mc.cores = num_cores)
tests = do.call(rbind, tests)
} else {
tests = lapply(ids(x), function(id) {
classify_single_mutation(x = x, id = id)
})
}
tests = parallel::mclapply(X = ids(x),
FUN = classify_single_mutation,
x = x,
mc.cores = num_cores)
num_cores=2
tests = parallel::mclapply(X = ids(x),
FUN = classify_single_mutation,
x = x,
mc.cores = num_cores)
tests
tests = do.call(rbind, tests)
tests
lapply(ids(x), function(id) {
classify_single_mutation(x = x, id = id)
})
parallel::mclapply(X = ids(x),
FUN = classify_single_mutation,
x = x,
mc.cores = num_cores)
tests
tests = parallel::mclapply(X = ids(x),
FUN = classify_single_mutation,
x = x,
mc.cores = num_cores)
tests
tests %>% do.call(rbind, .)
output$classification$fit = tests %>% do.call(rbind, .)
output$classification$parameters = dplyr::tibble(entropy_cutoff = entropy_cutoff,
rho = rho,
karyotypes = list(karyotypes))
output$classification$priors = priors
classification(output)
classification(output) %>% dplyr::filter(state == !!state)
classification(output) %>% dplyr::filter(state == !!state) %>% nrow()
cli::cli_alert_info('There are: ')
for (state in c('HMD', 'LOH', 'CNLOH', 'AM', 'Tier-2')) {
N = classification(output) %>% dplyr::filter(state == !!state) %>% nrow()
cli::cli_bullets(c("*" = paste0("N = ", N, ' mutations (', state, ')')))
}
30 changes: 15 additions & 15 deletions R/classify.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ classify = function(x,
stopifnot(inherits(x, "INCOMMON"))
if(is.null(entropy_cutoff)) entropy_cutoff = 1

# Output

output = x

output$classification = list()

cli::cli_h1(
"INCOMMON inference of copy number and mutation multiplicity for sample {.field {x$sample}}"
)
Expand Down Expand Up @@ -110,21 +104,27 @@ classify = function(x,

}

output$classification$fit = tests %>% do.call(rbind, .)
output$classification$parameters = dplyr::tibble(entropy_cutoff = entropy_cutoff,
rho = rho,
karyotypes = list(karyotypes))
output$classification$priors = priors
# Output

x$classification = list()

x$classification$fit = tests %>% do.call(rbind, .)
x$classification$parameters = dplyr::tibble(
entropy_cutoff = entropy_cutoff,
rho = rho,
karyotypes = list(karyotypes)
)
x$classification$priors = priors

cli::cli_alert_info('There are: ')
for (state in c('HMD', 'LOH', 'CNLOH', 'AM', 'Tier-2')) {
N = classification(output) %>% dplyr::filter(state == !!state) %>% nrow()
N = classification(x) %>% dplyr::filter(state == !!state) %>% nrow()
cli::cli_bullets(c("*" = paste0("N = ", N, ' mutations (', state, ')')))
}

mean_ent = classification(output) %>% dplyr::pull(entropy) %>% mean()
min_ent = classification(output) %>% dplyr::pull(entropy) %>% min()
max_ent = classification(output) %>% dplyr::pull(entropy) %>% max()
mean_ent = classification(x) %>% dplyr::pull(entropy) %>% mean()
min_ent = classification(x) %>% dplyr::pull(entropy) %>% min()
max_ent = classification(x) %>% dplyr::pull(entropy) %>% max()

cli::cli_alert_info(
'The mean classification entropy is {.field {round(mean_ent, 2)}} (min: {.field {round(min_ent, 2)}}, max: {.field {round(max_ent, 2)}})'
Expand Down
12 changes: 6 additions & 6 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,21 @@ info = function(x, id){


DP = function(x, id){
x = idify(x)
if(!("id" %in% colnames(x$input))) x = idify(x)
x$input %>%
dplyr::filter(id == !!id) %>%
dplyr::pull(DP)
}

NV = function(x, id){
x = idify(x)
if(!("id" %in% colnames(x$input))) x = idify(x)
x$input %>%
dplyr::filter(id == !!id) %>%
dplyr::pull(NV)
}

entropy = function(x, id){
x = idify(x)
if(!("id" %in% colnames(x$input))) x = idify(x)
posterior(x, id) %>%
dplyr::filter(NV == NV(x, id)) %>%
dplyr::arrange(dplyr::desc(value)) %>%
Expand All @@ -156,21 +156,21 @@ entropy = function(x, id){


VAF = function(x, id){
x = idify(x)
if(!("id" %in% colnames(x$input))) x = idify(x)
x$input %>%
dplyr::filter(id == !!id) %>%
dplyr::pull(VAF)
}

gene = function(x, id){
x = idify(x)
if(!("id" %in% colnames(x$input))) x = idify(x)
x$input %>%
dplyr::filter(id == !!id) %>%
dplyr::pull(gene)
}

get_gene_role = function(x, id){
x = idify(x)
if(!("id" %in% colnames(x$input))) x = idify(x)
x$data %>%
dplyr::filter(id == !!id) %>%
dplyr::pull(gene_role)
Expand Down

0 comments on commit dd6810a

Please sign in to comment.