Skip to content

Commit

Permalink
Merge pull request #11 from KarchinLab/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
jiaying2508 authored Jun 6, 2023
2 parents 74f99c0 + d1b8936 commit 6cef8bb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 23 deletions.
6 changes: 3 additions & 3 deletions R/gabowmyers.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,15 @@ prepareGraphForGabowMyers <- function(w, chains, input_data) {
#' @import tidyr
#' @param w matrix of CCF values (rows = clusters, columns = samples)
#' @return graph_G tibble of possible edges with columns edge, parent, child
prepareGraph <- function(w_mat) {
prepareGraph <- function(w_mat, thresh) {
graph_pre <- data.frame(edge = character(), parent = character(), child = character())
for (i in seq_len(nrow(w_mat))) {
graph_pre <- graph_pre %>% add_row(edge = paste("root->", i, sep = ""), parent = "root", child = as.character(i))
for (j in seq_len(nrow(w_mat))) {
if (i!=j) {
i_row = w_mat[i, ]
j_row = w_mat[j, ]
if (all(j_row[i_row > 0] > 0)) {
if (all(j_row-i_row > -thresh)) {
graph_pre <- graph_pre %>% add_row(edge = paste(j, "->", i, sep = ""), parent = as.character(j), child = as.character(i))
}
}
Expand Down Expand Up @@ -261,7 +261,7 @@ enumerateSpanningTrees <- function(graph_G, w, sum_filter_thresh=0.2) {
generateAllTrees <- function(w, lineage_precedence_thresh=0.1, sum_filter_thresh=0.2) {
w_mat <- estimateCCFs(w)
w_mat <- assign("w_mat", w_mat, envir = .GlobalEnv)
graph_G_pre <- prepareGraph(w_mat)
graph_G_pre <- prepareGraph(w_mat, lineage_precedence_thresh)
graph_G <- filterEdgesBasedOnCCFs(graph_G_pre, w_mat, thresh = lineage_precedence_thresh)
graph_G <- assign("graph_G", graph_G, envir = .GlobalEnv)
enumerateSpanningTreesModified(graph_G, w_mat, sum_filter_thresh = sum_filter_thresh)
Expand Down
39 changes: 21 additions & 18 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' @export
#' @param input_file input data file;
importCSV <- function(inputFile) {
importCSV <- function(inputFile, alt_reads_thresh = 6, vaf_thresh = 0.02) {
data <- read_csv(inputFile, show_col_types = FALSE)
output_data <- list()

Expand Down Expand Up @@ -36,7 +36,7 @@ importCSV <- function(inputFile) {
output_data$n[output_data$n==0] <- round(mean(output_data$n))
}

output_data$tcn <- as.matrix(data[c("mutation", "sample", "tumor_integer_copy_number")] %>% pivot_wider(names_from = sample, values_from = tumor_integer_copy_number, values_fill = 0))
output_data$tcn <- as.matrix(data[c("mutation", "sample", "tumor_integer_copy_number")] %>% pivot_wider(names_from = sample, values_from = tumor_integer_copy_number, values_fill = 2))
rownames(output_data$tcn) <- output_data$tcn[,'mutation']
output_data$tcn <- output_data$tcn[,-1, drop=FALSE]
rowname = rownames(output_data$tcn)
Expand All @@ -45,29 +45,30 @@ importCSV <- function(inputFile) {
rownames(output_data$tcn) = rowname
colnames(output_data$tcn) = colname

output_data$purity <- as.matrix(data[c("mutation", "sample", "purity")] %>% pivot_wider(names_from = sample, values_from = purity))
output_data$purity <- as.matrix(data[c("mutation", "sample", "purity")] %>% pivot_wider(names_from = sample, values_from = purity, values_fill = 0))
rownames(output_data$purity) <- output_data$purity[,'mutation']
output_data$purity <- output_data$purity[,-1, drop=FALSE]
rowname = rownames(output_data$purity)
colname = colnames(output_data$purity)
output_data$purity <- matrix(as.numeric(output_data$purity), ncol = ncol(output_data$purity))
rownames(output_data$purity) = rowname
colnames(output_data$purity) = colname
if (ncol(output_data$purity) == 1) {
if (length(unique(output_data$purity[,])) != 1) {
warning("purity not consistent for the same sample; taking the mean purity")
output_data$purity <- round(colMeans(output_data$purity), digit=2)
} else {
output_data$purity <- unique(output_data$purity[,])
}
} else {
if (nrow(unique(output_data$purity[,])) != 1) {
warning("Purity not consistent for the same sample; taking the mean purity")
output_data$purity <- round(colMeans(output_data$purity), digit=2)
} else {
output_data$purity <- unique(output_data$purity[,])[1,]
}
}
output_data$purity = colSums(output_data$purity) / colSums(!!output_data$purity)
# if (ncol(output_data$purity) == 1) {
# if (length(unique(output_data$purity[,])) != 1) {
# warning("purity not consistent for the same sample; taking the mean purity")
# output_data$purity <- round(colMeans(output_data$purity), digit=2)
# } else {
# output_data$purity <- unique(output_data$purity[,])
# }
# } else {
# if (nrow(unique(output_data$purity[,])) != 1) {
# warning("Purity not consistent for the same sample; taking the mean purity")
# output_data$purity <- round(colMeans(output_data$purity), digit=2)
# } else {
# output_data$purity <- unique(output_data$purity[,])[1,]
# }
# }

output_data$S = ncol(output_data$y)
output_data$I = nrow(output_data$y)
Expand All @@ -88,6 +89,8 @@ importCSV <- function(inputFile) {
output_data$m <- estimateMultiplicityMatrix(output_data)
}

output_data$y[output_data$y / output_data$n < vaf_thresh] = 0
output_data$y[output_data$y < alt_reads_thresh] = 0
return(output_data)
}

Expand Down
5 changes: 3 additions & 2 deletions vignettes/pictograph.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ The required user input is a csv file that contains at least columns named "samp
head(read.csv(system.file('extdata/example_input.csv', package = 'pictograph')))
```

The input file can be read using the importCSV function. Alt read count (y), total read count (n), tumor copy number (tcn), and multiplicity (m) are stored in matrices where the columns are samples, and rows are variants. Purity is supplied as a vector. I and S are integers representing the number of variants and number of samples, respectively.
The input file can be read using the importCSV function. Alt read count (y), total read count (n), tumor copy number (tcn), and multiplicity (m) are stored in matrices where the columns are samples, and rows are variants. Purity is supplied as a vector. I and S are integers representing the number of variants and number of samples, respectively. The function will also pre-process the data to remove potential false positive read counts. By default, a mutation with alt reads less than 6 or a VAF less than 2% is considered a false positive. Users can change the thresholds using the `alt_reads_thresh` and `vaf_thresh` parameters.

```{r input_data}
input_data <- importCSV(system.file('extdata/example_input.csv', package = 'pictograph'))
input_data <- importCSV(system.file('extdata/example_input.csv', package = 'pictograph'),
alt_reads_thresh = 6, vaf_thresh = 0.02)
```

```{r load_results, include = F}
Expand Down

0 comments on commit 6cef8bb

Please sign in to comment.