Skip to content

Commit

Permalink
Added plotly visuals + reworked log2FC calc + expanded pathway file
Browse files Browse the repository at this point in the history
  • Loading branch information
luisherfurth committed May 14, 2024
1 parent cd255c5 commit 7ac007b
Show file tree
Hide file tree
Showing 14 changed files with 771 additions and 118 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@
^R/do_anova_2w\.R$
^R/plot_2w_anvova\.R$
^R/two_way_anova\.R$
^.*\.Rproj$
^\.Rproj\.user$
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@ Imports:
ggrepel,
utils,
rlang,
methods,
data.table,
S4Vectors,
qvalue
viridis,
viridisLite,
plotly,
limma
Description: The 'MetAlyzer' S4 object provides methods to read and reformat metabolomics data for convenient data handling, statistics and downstream analysis. The resulting format corresponds to input data of the Shiny app 'MetaboExtract' (<https://www.metaboextract.shiny.dkfz.de/MetaboExtract/>).
License: GPL-3
Encoding: UTF-8
Language: en-US
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
rmarkdown,
knitr
Expand Down
13 changes: 12 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ export(metalyzer_colors)
export(pathway)
export(plot_log2FC)
export(plot_network)
export(plotly_network)
export(plotly_scatter)
export(plotly_vulcano)
export(polarity)
export(renameMetaData)
export(summarizeConcValues)
Expand All @@ -25,9 +28,17 @@ import(SummarizedExperiment)
import(dplyr)
import(ggplot2)
import(ggrepel)
import(limma)
import(viridis)
import(viridisLite)
importFrom(data.table,":=")
importFrom(plotly,add_annotations)
importFrom(plotly,ggplotly)
importFrom(plotly,layout)
importFrom(plotly,plot_ly)
importFrom(rlang,.data)
importFrom(stats,aov)
importFrom(stats,lm)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(tibble,deframe)
importFrom(tibble,rownames_to_column)
2 changes: 1 addition & 1 deletion R/MetAlyzer_fpaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ polarity <- function() {
#' @examples
#' fpath <- pathway()
pathway <- function() {
system.file("extdata", "pathway.xlsx", package = "MetAlyzer")
system.file("extdata", "Pathway_130524.xlsx", package = "MetAlyzer")
}
139 changes: 46 additions & 93 deletions R/calculate_log2FC.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' @title Calculate log2 fold change
#'
#' @description This function calculates the log2 fold change of two groups from
#' plotting_data.
#' @description This function calculates log2(FC), p-values, and adjusted p-values
#' of the data using limma.
#' @param metalyzer_se A Metalyzer object
#' @param categorical A column specifying the two groups
#' @param impute_perc_of_min A numeric value below 1
Expand All @@ -11,8 +11,10 @@
#'
#' @import dplyr
#' @import SummarizedExperiment
#' @import limma
#' @importFrom rlang .data
#' @importFrom data.table :=
#' @importFrom stats model.matrix
#' @export
#'
#' @examples
Expand All @@ -36,19 +38,6 @@ calculate_log2FC <- function(metalyzer_se,
categorical,
impute_perc_of_min = 0.2,
impute_NA = FALSE) {
## Check for qvalue and BiocManager installation
# installed_packages <- utils::installed.packages()[, "Package"]
# if (! "qvalue" %in% installed_packages) {
# if (! "BiocManager" %in% installed_packages) {
# cat("Info: Installing package 'BiocManager':\n")
# utils::install.packages("BiocManager")
# cat("\n")
# }
# cat("Info: Installing package 'qvalue':\n")
# BiocManager::install("qvalue", ask = FALSE, type = "binary")
# cat("\n")
# }


## Create a new dataframe to calculate the log2FC
if (!categorical %in% colnames(metalyzer_se@metadata$aggregated_data)) {
Expand All @@ -59,86 +48,50 @@ calculate_log2FC <- function(metalyzer_se,
metalyzer_se <- data_imputation(metalyzer_se, impute_perc_of_min, impute_NA)
metalyzer_se <- data_transformation(metalyzer_se)

df <- metalyzer_se@metadata$aggregated_data %>%
ungroup(all_of(categorical)) %>%
mutate(Value = .data$log2_Conc,
Group = !!sym(categorical)) %>%
select(.data$Metabolite,
.data$Class,
.data$Group,
.data$Value)
aggregated_data <- metalyzer_se@metadata$aggregated_data %>%
dplyr::mutate(LogConcentration = log2(.data$Concentration))
# Prepare abundance data
feat_data <- dplyr::ungroup(aggregated_data) %>%
dplyr::select(.data$Metabolite, .data$ID, .data$LogConcentration) %>%
tidyr::pivot_wider(names_from = 'Metabolite', values_from = 'LogConcentration')

## Check if already factor
group_vec <- df$Group
if (!methods::is(group_vec, "factor")) {
group_vec <- factor(group_vec)
df$Group <- group_vec
cat("Warning: No order was given for categorical!\n")
} else {
group_vec <- droplevels(group_vec)
}
group_levels <- levels(group_vec)
if (length(group_levels) > 2) {
cat("Warning: More than two levels were given! Dropping",
paste(group_levels[3:length(group_levels)], collapse = ", "), "\n")
}
cat("Info: Calculating log2 fold change from ", group_levels[1], " to ",
group_levels[2], " (column: ", categorical, ").\n", sep = "")
df <- filter(df, .data$Group %in% group_levels[1:2])
df$Group <- droplevels(df$Group)
# Prepare sample metadata of interest
smp_metadata <- colData(metalyzer_se) %>%
tibble::as_tibble(rownames = 'ID')

## Check for further grouping
grouping_vars <- as.character(groups(df))
# Use original column names whose spaces are not replaced with '.'
colnames(smp_metadata) <- c('ID', colnames(colData(metalyzer_se)))
smp_metadata <- dplyr::select(smp_metadata, .data$ID, all_of(categorical))

if (!"Metabolite" %in% grouping_vars) {
grouping_vars[length(grouping_vars)+1] <- "Metabolite"
}

## Calculate log2FC and p-val
cat("Info: Calculating log2 fold change groupwise (",
paste(grouping_vars, collapse = " * "),
") using a linear model... ", sep = "")
options(warn = -1)
change_df <- df %>%
group_modify(~ apply_linear_model(df = .x)) %>%
ungroup(.data$Metabolite) %>%
mutate(qval = qvalue::qvalue(.data$pval, pi0 = 1)$qvalues)
options(warn = 0)
cat("finished!\n")

metalyzer_se@metadata$log2FC <- change_df
return(metalyzer_se)
}
# Combine abundance data and sample metadata to ensure matched information
combined_data <- dplyr::left_join(feat_data, smp_metadata, by = 'ID')

#' @title Calculate log2 fold change
#'
#' @description This function applies a linear model to calculate the log2 fold change and
#' its significance
#' @param df A subset data frame
#' @param ...
#'
#' @import dplyr
#' @importFrom stats lm
#' @importFrom rlang .data
#'
#' @keywords internal
apply_linear_model <- function(df, ...) {
class <- df$Class[1]
df <- df %>%
filter(!is.na(.data$Value)) %>%
droplevels()
if (length(levels(df$Group)) != 2) {
l2fc <- NA
pval <- NA
} else {
fit1 <- stats::lm(Value ~ Group, data = df)
l2fc <- fit1$coefficients[2]
fit_dim <- dim(summary(fit1)$coefficients)
pval <- summary(fit1)$coefficients[fit_dim[1],fit_dim[2]]
}
output_df <- data.frame(Class = class,
log2FC = l2fc,
pval = pval,
row.names = NULL)
return(output_df)
# Retrieve data matrix and sample metadata from combined data to conduct limma
data_mat <- combined_data[, seq_len(ncol(feat_data))] %>%
tibble::column_to_rownames('ID') %>%
t()
group_vec <- combined_data[, ncol(feat_data)+1, drop = T]

# Sanity check if specified categorical can split data into two groups
if (length(unique(group_vec)) != 2) {
stop("The specified categorical cannot split data into two groups.")
}

## Compute log2(FC), p-values, and adjusted p-values using limma
design <- model.matrix(~ group_vec)
fit <- limma::lmFit(data_mat, design = design)
fit <- limma::eBayes(fit)
log2FCRes <- limma::topTable(fit, coef = 2, number = Inf) %>%
tibble::rownames_to_column('Metabolite') %>%
dplyr::select(.data$Metabolite, .data$logFC, .data$P.Value, .data$adj.P.Val) %>%
dplyr::rename(log2FC = .data$logFC, pval = .data$P.Value, qval = .data$adj.P.Val)
# Combined all information into a table
group_info <- combined_data[, c(1, ncol(feat_data)+1)]
log2FCTab <- dplyr::left_join(aggregated_data, group_info, by = 'ID') %>%
dplyr::left_join(log2FCRes, by = 'Metabolite') %>%
dplyr::select(.data$Metabolite, .data$Class, .data$log2FC, .data$pval, .data$qval) %>%
dplyr::distinct(.data$Metabolite, .keep_all = TRUE)
metalyzer_se@metadata$log2FC <- log2FCTab

return(metalyzer_se)
}
Loading

0 comments on commit 7ac007b

Please sign in to comment.