From 6c2861438f0d36908d02b8de8e58ed0041559c91 Mon Sep 17 00:00:00 2001 From: Nils Mechtel Date: Sun, 30 Jan 2022 17:53:20 +0100 Subject: [PATCH] last changes for resubmission --- DESCRIPTION | 2 +- doc/MetAlyzer_User_Guide.R | 92 -------- doc/MetAlyzer_User_Guide.Rmd | 177 -------------- doc/MetAlyzer_User_Guide.html | 418 ---------------------------------- 4 files changed, 1 insertion(+), 688 deletions(-) delete mode 100644 doc/MetAlyzer_User_Guide.R delete mode 100755 doc/MetAlyzer_User_Guide.Rmd delete mode 100644 doc/MetAlyzer_User_Guide.html diff --git a/DESCRIPTION b/DESCRIPTION index d0fd18a..5b38c72 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Authors@R: email = "carolinandresen@googlemail.com", role = c("aut"), comment = c(ORCID = "0000-0002-8960-7719")), - person("Daniel", "Hübschmann", + person("Daniel", "Huebschmann", email = "d.huebschmann@dkfz-heidelberg.de", role = c("aut"), comment = c(ORCID = "0000-0002-6041-7049"))) diff --git a/doc/MetAlyzer_User_Guide.R b/doc/MetAlyzer_User_Guide.R deleted file mode 100644 index b3eaaa0..0000000 --- a/doc/MetAlyzer_User_Guide.R +++ /dev/null @@ -1,92 +0,0 @@ -## ---- echo=FALSE, message=FALSE----------------------------------------------- -knitr::opts_chunk$set(echo=TRUE, collapse=T, comment='#>') -library(MetAlyzer) -library(ggplot2) -library(dplyr) - -## ----set_data_path------------------------------------------------------------ -fpath <- system.file("extdata", "example_data.xlsx", package = "MetAlyzer") -mpath <- system.file("extdata", "example_meta_data.rds", package = "MetAlyzer") - -## ----initialize--------------------------------------------------------------- -obj <- MetAlyzerDataset(file_path = fpath) -show(obj) - -## ----getter------------------------------------------------------------------- -head(metaData(obj)) - -head(quantStatus(obj), c(5, 5)) - -## ----filter_metabolites------------------------------------------------------- -obj <- filterMetabolites(obj, class_name = "Metabolism Indicators") - -## ----filter_meta_data--------------------------------------------------------- -obj <- filterMetaData(obj, column = Group, keep = c(1:6)) - -## ----summarise_quant_data----------------------------------------------------- -summariseQuantData(obj) - -## ----read_meta_data----------------------------------------------------------- -meta_df <- readRDS(mpath) -head(meta_df) - -## ----update_meta_data--------------------------------------------------------- -obj <- updateMetaData(obj, name = Replicate, new_colum = meta_df$Replicate) -show(obj) - -## ----rename_meta_data--------------------------------------------------------- -obj <- renameMetaData(obj, Method = Group) -head(metaData(obj)) - -## ----plotting_data------------------------------------------------------------ -obj <- createPlottingData(obj, Method, Tissue, ungrouped = Replicate) -gg_df <- plottingData(obj) - -head(gg_df) - -## ---- glu_plot, fig.width=7, fig.height=4.5----------------------------------- -glu_gg_df <- filter(gg_df, Metabolite == "Glu") - -ggplot(glu_gg_df, aes(Method, Concentration, color = Status)) + - geom_point() + - scale_color_manual(values = c("Valid" = "#00CD66", - "LOQ" = "#87CEEB", - "LOD" = "#6A5ACD")) + - ylab("Concentration [pmol/mg Tissue]") + - facet_grid(~ Tissue) - -## ----before_imp--------------------------------------------------------------- -length(which(gg_df$Concentration == 0)) - -## ----imputation--------------------------------------------------------------- -obj <- imputePlottingData(obj, Tissue, Metabolite) -imp_gg_df <- plottingData(obj) - -## ----after_imp---------------------------------------------------------------- -min(imp_gg_df$imp_Conc, na.rm = TRUE) - -## ----transformation----------------------------------------------------------- -obj <- transformPlottingData(obj) -trans_gg_df <- plottingData(obj) -head(trans_gg_df) - -## ---- glu_plot_transformed, fig.width=7, fig.height=4.5----------------------- -trans_glu_gg_df <- filter(trans_gg_df, Metabolite == "Glu") - -ggplot(trans_glu_gg_df, aes(Method, transf_Conc, color = Status)) + - geom_point() + - scale_color_manual(values = c("Valid" = "#00CD66", - "LOQ" = "#87CEEB", - "LOD" = "#6A5ACD")) + - facet_grid(~ Tissue) - -## ---- ANOVA------------------------------------------------------------------- -obj <- performANOVA(obj, categorical = Method) -anv_gg_df <- plottingData(obj) -head(data.frame(anv_gg_df)) - -## ---- ANOVA_optimal----------------------------------------------------------- -anv_gg_df$optimal <- sapply(anv_gg_df$ANOVA_group, function(g) grepl("A", g)) -obj <- setPlottingData(obj, anv_gg_df) -head(data.frame(anv_gg_df)) - diff --git a/doc/MetAlyzer_User_Guide.Rmd b/doc/MetAlyzer_User_Guide.Rmd deleted file mode 100755 index b27dcf5..0000000 --- a/doc/MetAlyzer_User_Guide.Rmd +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: "MetAlyzer User Guide" -author: "Nils Mechtel & Carolin Andresen" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -Vignette: > - %\VignetteIndexEntry{Read and Analyze MetIDQ™ Software Output Files} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} - %\VignettePackage{MetAlyzer} ---- - -```{r, echo=FALSE, message=FALSE} -knitr::opts_chunk$set(echo=TRUE, collapse=T, comment='#>') -library(MetAlyzer) -library(ggplot2) -library(dplyr) -``` - -The package provides methods to read output files from the MetIDQ™ software into R. Metabolomics data is read and reformatted into an S4 object for convenient data handling, statistics and downstream analysis. - -## Install - -There is a version available on CRAN. - -```r -install.packages("MetAlyzer") -``` - -## Overview - -![](`r rprojroot::is_r_package$find_file('vignettes/MetAlyzer_overview.png')`){width=100%} - -The package takes metabolomic measurements as ".xlsx" files generated from the MetIDQ™ software. Additionally, meta data for each sample can be provided for further analysis. - -Effective quantification of metabolites requires a reliable signal. The limit of detection (LOD) is defined as 3 times signal-to-noise of the baseline, calculated by the software MetIDQ for each metabolite. Values are classified as "Valid", "LOQ" (below limit of quantification) or "LOD" (below limit of detection). This information is encoded in the color in the ".xlsx" table. Further color codes can include "ISTD Out of Range", "Invalid" and "Incomplete". The MetAlyzer packages allow to read both information, the value and the quantification status, gives statistics about the quantification efficacy and allows filtering based on the LOD. - -![Example of excel sheet.](`r rprojroot::is_r_package$find_file('vignettes/screenshot_xlsx.png')`){width=100%} - -## Get started -To present the functionalities of the package, we use a data set published by [Gegner et al. 2022](https://doi.org/10.1101/2021.12.16.472947). This data set used 6 (1 to 6) different extraction methods to quantify 630 metabolites in triplicates for 4 tissue types (Drosophila, Mouse Liver, Mouse Kidney, Zebrafish Liver). - -### Set data paths to the example data and a meta data file: -```{r set_data_path} -fpath <- system.file("extdata", "example_data.xlsx", package = "MetAlyzer") -mpath <- system.file("extdata", "example_meta_data.rds", package = "MetAlyzer") -``` - -### Initialize MetAlyzer object and load data: -```{r initialize} -obj <- MetAlyzerDataset(file_path = fpath) -show(obj) -``` -The overview shows the different slots in the MetAlyzer object. The 630 metabolites are accompanied by 232 metabolite indicators calculated by the MetIDQ™ software. - -The slots can be accessed using the functions *metaData*, *rawData*, *quantStatus* and *metabolites*. - -**For example:** -```{r getter} -head(metaData(obj)) - -head(quantStatus(obj), c(5, 5)) -``` - -### Filter metabolites and meta data -To subset the data set, the functions *filterMetabolites* and *filterMetaData* can be used to filter for certain metabolite classes or meta data. The original data are kept, so the filtering can be reset to the original data set using *resetMetabolites* and *resetMetaData*. - -**For example, we want to exclude the metabolite indicators:** -```{r filter_metabolites} -obj <- filterMetabolites(obj, class_name = "Metabolism Indicators") -``` - -**There are also 2 blank samples in the data that we want to remove:** -The extraction methods are encoded in the column "Group" of the meta data. We only keep those samples that are named c(1:6). -```{r filter_meta_data} -obj <- filterMetaData(obj, column = Group, keep = c(1:6)) -``` - -### Show statistics -The function *summariseQuantData* shows a summary of the quantification statistics of the data set. E.g. 45.36% of the metabolites across the data set were below the LOD. -```{r summarise_quant_data} -summariseQuantData(obj) -``` -### Add meta data -As the different extraction methods were measured in triplicates, we want to add this meta data using the function *updateMetaData*. -First we load the meta data. -```{r read_meta_data} -meta_df <- readRDS(mpath) -head(meta_df) -``` - -And then add the replicate information. -```{r update_meta_data} -obj <- updateMetaData(obj, name = Replicate, new_colum = meta_df$Replicate) -show(obj) -``` - -### Rename meta data -In the meta data, the description of the 6 extraction methods is encoded in the column "Group". For downstream analyses, we want to rename this column into "Method" using *renameMetaData*. -```{r rename_meta_data} -obj <- renameMetaData(obj, Method = Group) -head(metaData(obj)) -``` - -### Reformat for plotting -For further filtering and plotting, the data can be reformatted into a data frame. This allows easy plotting with e.g. ggplot2 and filtering by e.g. the metabolites of interest or the quantification status. If replicates are available, some statistics are automatically calculated for each metabolite and each additional grouping variable that is passed. For our study, we can add "Method" and "Tissue" to indicate that there are replicates for these variables. In addition, it is possible to pass another column from the meta data that will be added but not used as a grouping variable for the statistics. -```{r plotting_data} -obj <- createPlottingData(obj, Method, Tissue, ungrouped = Replicate) -gg_df <- plottingData(obj) - -head(gg_df) -``` - -For example, we can filter for glutamic acid (Glu) and plot the concentration. -```{r, glu_plot, fig.width=7, fig.height=4.5} -glu_gg_df <- filter(gg_df, Metabolite == "Glu") - -ggplot(glu_gg_df, aes(Method, Concentration, color = Status)) + - geom_point() + - scale_color_manual(values = c("Valid" = "#00CD66", - "LOQ" = "#87CEEB", - "LOD" = "#6A5ACD")) + - ylab("Concentration [pmol/mg Tissue]") + - facet_grid(~ Tissue) -``` - -### Imputation and transformation -In case NAs or zeros are produced, we provide zero imputation as described in ([Gegner et al.](https://doi.org/10.1101/2021.12.16.472947)). Before, the imputation, there are 12,212 zero values in the data set. -```{r before_imp} -length(which(gg_df$Concentration == 0)) -``` - -Now we impute the data for each tissue and metabolite separately. -```{r imputation} -obj <- imputePlottingData(obj, Tissue, Metabolite) -imp_gg_df <- plottingData(obj) -``` - -After the imputation, the lowest values is non-zero. The imputed values are written into column "imp_Conc". NAs are not imputed by default. -```{r after_imp} -min(imp_gg_df$imp_Conc, na.rm = TRUE) -``` - -Raw or imputed concentrations in the object can directly be transformed using the function *transformPlottingData*. By default the data is log2 transformed and written into the column "transf_Conc". -```{r transformation} -obj <- transformPlottingData(obj) -trans_gg_df <- plottingData(obj) -head(trans_gg_df) -``` - -To plot imputed and transformed data, we filter again for glutamic acid: -```{r, glu_plot_transformed, fig.width=7, fig.height=4.5} -trans_glu_gg_df <- filter(trans_gg_df, Metabolite == "Glu") - -ggplot(trans_glu_gg_df, aes(Method, transf_Conc, color = Status)) + - geom_point() + - scale_color_manual(values = c("Valid" = "#00CD66", - "LOQ" = "#87CEEB", - "LOD" = "#6A5ACD")) + - facet_grid(~ Tissue) -``` - -### ANOVA -As described in ([Gegner et al.](https://doi.org/10.1101/2021.12.16.472947)), an ANOVA can be used to identify metabolites which are significantly higher in one or more methods compared to all other for each metabolite. -```{r, ANOVA} -obj <- performANOVA(obj, categorical = Method) -anv_gg_df <- plottingData(obj) -head(data.frame(anv_gg_df)) -``` - -Any method labeled with an "A" can now be considered optimal among all tested methods. - -```{r, ANOVA_optimal} -anv_gg_df$optimal <- sapply(anv_gg_df$ANOVA_group, function(g) grepl("A", g)) -obj <- setPlottingData(obj, anv_gg_df) -head(data.frame(anv_gg_df)) -``` diff --git a/doc/MetAlyzer_User_Guide.html b/doc/MetAlyzer_User_Guide.html deleted file mode 100644 index 7a911b6..0000000 --- a/doc/MetAlyzer_User_Guide.html +++ /dev/null @@ -1,418 +0,0 @@ - - - - - - - - - - - - - - - - -MetAlyzer User Guide - - - - - - - - - - - - - - - - - - - - - - - - - -

MetAlyzer User Guide

-

Nils Mechtel & Carolin Andresen

-

2022-01-30

- - - -

The package provides methods to read output files from the MetIDQ™ software into R. Metabolomics data is read and reformatted into an S4 object for convenient data handling, statistics and downstream analysis.

-
-

Install

-

There is a version available on CRAN.

-
install.packages("MetAlyzer")
-
-
-

Overview

-

-

The package takes metabolomic measurements as “.xlsx” files generated from the MetIDQ™ software. Additionally, meta data for each sample can be provided for further analysis.

-

Effective quantification of metabolites requires a reliable signal. The limit of detection (LOD) is defined as 3 times signal-to-noise of the baseline, calculated by the software MetIDQ for each metabolite. Values are classified as “Valid”, “LOQ” (below limit of quantification) or “LOD” (below limit of detection). This information is encoded in the color in the “.xlsx” table. Further color codes can include “ISTD Out of Range”, “Invalid” and “Incomplete”. The MetAlyzer packages allow to read both information, the value and the quantification status, gives statistics about the quantification efficacy and allows filtering based on the LOD.

-
- -

Example of excel sheet.

-
-
-
-

Get started

-

To present the functionalities of the package, we use a data set published by Gegner et al. 2022. This data set used 6 (1 to 6) different extraction methods to quantify 630 metabolites in triplicates for 4 tissue types (Drosophila, Mouse Liver, Mouse Kidney, Zebrafish Liver).

-
-

Set data paths to the example data and a meta data file:

-
fpath <- system.file("extdata", "example_data.xlsx", package = "MetAlyzer")
-mpath <- system.file("extdata", "example_meta_data.rds", package = "MetAlyzer")
-
-
-

Initialize MetAlyzer object and load data:

-
obj <- MetAlyzerDataset(file_path = fpath)
-show(obj)
-#> -------------------------------------
-#> File name: example_data.xlsx 
-#> Sheet: 1 
-#> File path: /tmp/RtmpmZXvCS/temp_libpathab1375f5ae21/MetAlyzer/extdata 
-#> Metabolites: 862 
-#> Classes: 24 
-#> Including metabolism indicators: TRUE 
-#> Number of samples: 74 
-#> Columns meta data: "Plate Bar Code"; "Sample Bar Code"; "Sample Type"; "Group"; "Tissue"; "Sample Volume"; "Measurement Time"
-#> Ploting data created: FALSE
-

The overview shows the different slots in the MetAlyzer object. The 630 metabolites are accompanied by 232 metabolite indicators calculated by the MetIDQ™ software.

-

The slots can be accessed using the functions metaData, rawData, quantStatus and metabolites.

-

For example:

-
head(metaData(obj))
-#>                 Plate Bar Code Sample Bar Code Sample Type Group     Tissue
-#> 9  1030423581-1 | 1030425491-1      1030420836      Sample     1 Drosophila
-#> 10 1030423581-1 | 1030425491-1      1030420841      Sample     1 Drosophila
-#> 11 1030423581-1 | 1030425491-1      1030420855      Sample     1 Drosophila
-#> 12 1030423581-1 | 1030425491-1      1030423460      Sample     2 Drosophila
-#> 13 1030423581-1 | 1030425491-1      1030420860      Sample     2 Drosophila
-#> 14 1030423581-1 | 1030425491-1      1030420874      Sample     2 Drosophila
-#>    Sample Volume                          Measurement Time
-#> 9             10 2020.10.05 17:52:54 | 2020.10.06 16:27:10
-#> 10            10 2020.10.05 17:59:08 | 2020.10.06 16:30:42
-#> 11            10 2020.10.05 18:05:19 | 2020.10.06 21:53:52
-#> 12            10 2020.10.05 18:11:33 | 2020.10.06 16:37:44
-#> 13            10 2020.10.05 18:17:47 | 2020.10.06 22:00:56
-#> 14            10 2020.10.05 18:23:58 | 2020.10.06 22:04:29
-
-head(quantStatus(obj), c(5, 5))
-#>       C0    C2    C3 C3-DC (C4-OH) C3-OH
-#> 9  Valid Valid Valid           LOD   LOD
-#> 10 Valid Valid Valid           LOD   LOD
-#> 11 Valid Valid Valid           LOD   LOD
-#> 12 Valid Valid Valid         Valid   LOD
-#> 13 Valid Valid Valid           LOD   LOD
-
-
-

Filter metabolites and meta data

-

To subset the data set, the functions filterMetabolites and filterMetaData can be used to filter for certain metabolite classes or meta data. The original data are kept, so the filtering can be reset to the original data set using resetMetabolites and resetMetaData.

-

For example, we want to exclude the metabolite indicators:

-
obj <- filterMetabolites(obj, class_name = "Metabolism Indicators")
-#> 232 metabolites were filtered!
-

There are also 2 blank samples in the data that we want to remove: The extraction methods are encoded in the column “Group” of the meta data. We only keep those samples that are named c(1:6).

-
obj <- filterMetaData(obj, column = Group, keep = c(1:6))
-
-
-

Show statistics

-

The function summariseQuantData shows a summary of the quantification statistics of the data set. E.g. 45.36% of the metabolites across the data set were below the LOD.

-
summariseQuantData(obj)
-#> -------------------------------------
-#> Valid: 21951 (48.39%)
-#> LOQ: 2832 (6.24%)
-#> LOD: 20577 (45.36%)
-#> NAs: 0 (0%)
-
-
-

Add meta data

-

As the different extraction methods were measured in triplicates, we want to add this meta data using the function updateMetaData. First we load the meta data.

-
meta_df <- readRDS(mpath)
-head(meta_df)
-#>   Method Replicate
-#> 1      1        R1
-#> 2      1        R2
-#> 3      1        R3
-#> 4      2        R1
-#> 5      2        R2
-#> 6      2        R3
-

And then add the replicate information.

-
obj <- updateMetaData(obj, name = Replicate, new_colum = meta_df$Replicate)
-show(obj)
-#> -------------------------------------
-#> File name: example_data.xlsx 
-#> Sheet: 1 
-#> File path: /tmp/RtmpmZXvCS/temp_libpathab1375f5ae21/MetAlyzer/extdata 
-#> Metabolites: 630 
-#> Classes: 23 
-#> Including metabolism indicators: FALSE 
-#> Number of samples: 72 
-#> Columns meta data: "Plate Bar Code"; "Sample Bar Code"; "Sample Type"; "Group"; "Tissue"; "Sample Volume"; "Measurement Time"; "Replicate"
-#> Ploting data created: FALSE
-
-
-

Rename meta data

-

In the meta data, the description of the 6 extraction methods is encoded in the column “Group”. For downstream analyses, we want to rename this column into “Method” using renameMetaData.

-
obj <- renameMetaData(obj, Method = Group)
-head(metaData(obj))
-#>                 Plate Bar Code Sample Bar Code Sample Type Method     Tissue
-#> 9  1030423581-1 | 1030425491-1      1030420836      Sample      1 Drosophila
-#> 10 1030423581-1 | 1030425491-1      1030420841      Sample      1 Drosophila
-#> 11 1030423581-1 | 1030425491-1      1030420855      Sample      1 Drosophila
-#> 12 1030423581-1 | 1030425491-1      1030423460      Sample      2 Drosophila
-#> 13 1030423581-1 | 1030425491-1      1030420860      Sample      2 Drosophila
-#> 14 1030423581-1 | 1030425491-1      1030420874      Sample      2 Drosophila
-#>    Sample Volume                          Measurement Time Replicate
-#> 9             10 2020.10.05 17:52:54 | 2020.10.06 16:27:10        R1
-#> 10            10 2020.10.05 17:59:08 | 2020.10.06 16:30:42        R2
-#> 11            10 2020.10.05 18:05:19 | 2020.10.06 21:53:52        R3
-#> 12            10 2020.10.05 18:11:33 | 2020.10.06 16:37:44        R1
-#> 13            10 2020.10.05 18:17:47 | 2020.10.06 22:00:56        R2
-#> 14            10 2020.10.05 18:23:58 | 2020.10.06 22:04:29        R3
-
-
-

Reformat for plotting

-

For further filtering and plotting, the data can be reformatted into a data frame. This allows easy plotting with e.g. ggplot2 and filtering by e.g. the metabolites of interest or the quantification status. If replicates are available, some statistics are automatically calculated for each metabolite and each additional grouping variable that is passed. For our study, we can add “Method” and “Tissue” to indicate that there are replicates for these variables. In addition, it is possible to pass another column from the meta data that will be added but not used as a grouping variable for the statistics.

-
obj <- createPlottingData(obj, Method, Tissue, ungrouped = Replicate)
-gg_df <- plottingData(obj)
-
-head(gg_df)
-#> # A tibble: 6 × 12
-#> # Groups:   Method, Tissue, Metabolite [2]
-#>   Method Tissue     Replicate Metabolite Class   Concentration  Mean    SD    CV
-#>   <fct>  <fct>      <fct>     <fct>      <fct>           <dbl> <dbl> <dbl> <dbl>
-#> 1 1      Drosophila R1        C0         Acylca…         203   179.  82.4  0.461
-#> 2 1      Drosophila R2        C0         Acylca…          86.8 179.  82.4  0.461
-#> 3 1      Drosophila R3        C0         Acylca…         246   179.  82.4  0.461
-#> 4 1      Drosophila R1        C2         Acylca…          29.5  26.6  9.72 0.365
-#> 5 1      Drosophila R2        C2         Acylca…          15.8  26.6  9.72 0.365
-#> 6 1      Drosophila R3        C2         Acylca…          34.6  26.6  9.72 0.365
-#> # … with 3 more variables: CV_thresh <fct>, Status <fct>,
-#> #   valid_replicates <lgl>
-

For example, we can filter for glutamic acid (Glu) and plot the concentration.

-
glu_gg_df <- filter(gg_df, Metabolite == "Glu")
-
-ggplot(glu_gg_df, aes(Method, Concentration, color = Status)) +
-  geom_point() +
-  scale_color_manual(values = c("Valid" = "#00CD66",
-                                "LOQ" = "#87CEEB",
-                                "LOD" = "#6A5ACD")) + 
-  ylab("Concentration [pmol/mg Tissue]") + 
-  facet_grid(~ Tissue)
-

-
-
-

Imputation and transformation

-

In case NAs or zeros are produced, we provide zero imputation as described in (Gegner et al.). Before, the imputation, there are 12,212 zero values in the data set.

-
length(which(gg_df$Concentration == 0))
-#> [1] 12212
-

Now we impute the data for each tissue and metabolite separately.

-
obj <- imputePlottingData(obj, Tissue, Metabolite)
-imp_gg_df <- plottingData(obj)
-

After the imputation, the lowest values is non-zero. The imputed values are written into column “imp_Conc”. NAs are not imputed by default.

-
min(imp_gg_df$imp_Conc, na.rm = TRUE)
-#> [1] 2e-04
-

Raw or imputed concentrations in the object can directly be transformed using the function transformPlottingData. By default the data is log2 transformed and written into the column “transf_Conc”.

-
obj <- transformPlottingData(obj)
-trans_gg_df <- plottingData(obj)
-head(trans_gg_df)
-#> # A tibble: 6 × 14
-#> # Groups:   Method, Tissue, Metabolite [2]
-#>   Method Tissue     Replicate Metabolite Class Concentration imp_Conc transf_Conc
-#>   <fct>  <fct>      <fct>     <fct>      <fct>         <dbl>    <dbl>       <dbl>
-#> 1 1      Drosophila R1        C0         Acyl…         203      203          7.67
-#> 2 1      Drosophila R2        C0         Acyl…          86.8     86.8        6.44
-#> 3 1      Drosophila R3        C0         Acyl…         246      246          7.94
-#> 4 1      Drosophila R1        C2         Acyl…          29.5     29.5        4.88
-#> 5 1      Drosophila R2        C2         Acyl…          15.8     15.8        3.98
-#> 6 1      Drosophila R3        C2         Acyl…          34.6     34.6        5.11
-#> # … with 6 more variables: Mean <dbl>, SD <dbl>, CV <dbl>, CV_thresh <fct>,
-#> #   Status <fct>, valid_replicates <lgl>
-

To plot imputed and transformed data, we filter again for glutamic acid:

-
trans_glu_gg_df <- filter(trans_gg_df, Metabolite == "Glu")
-
-ggplot(trans_glu_gg_df, aes(Method, transf_Conc, color = Status)) +
-  geom_point() +
-  scale_color_manual(values = c("Valid" = "#00CD66",
-                                "LOQ" = "#87CEEB",
-                                "LOD" = "#6A5ACD")) +
-  facet_grid(~ Tissue)
-

-
-
-

ANOVA

-

As described in (Gegner et al.), an ANOVA can be used to identify metabolites which are significantly higher in one or more methods compared to all other for each metabolite.

-
obj <- performANOVA(obj, categorical = Method)
-anv_gg_df <- plottingData(obj)
-head(data.frame(anv_gg_df))
-#>   Method     Tissue Replicate Metabolite          Class Concentration imp_Conc
-#> 1      1 Drosophila        R1         C0 Acylcarnitines         203.0    203.0
-#> 2      1 Drosophila        R2         C0 Acylcarnitines          86.8     86.8
-#> 3      1 Drosophila        R3         C0 Acylcarnitines         246.0    246.0
-#> 4      1 Drosophila        R1         C2 Acylcarnitines          29.5     29.5
-#> 5      1 Drosophila        R2         C2 Acylcarnitines          15.8     15.8
-#> 6      1 Drosophila        R3         C2 Acylcarnitines          34.6     34.6
-#>   transf_Conc      Mean        SD        CV CV_thresh Status valid_replicates
-#> 1    7.665336 178.60000 82.357028 0.4611256    more30  Valid             TRUE
-#> 2    6.439623 178.60000 82.357028 0.4611256    more30  Valid             TRUE
-#> 3    7.942515 178.60000 82.357028 0.4611256    more30  Valid             TRUE
-#> 4    4.882643  26.63333  9.722311 0.3650430    more30  Valid             TRUE
-#> 5    3.981853  26.63333  9.722311 0.3650430    more30  Valid             TRUE
-#> 6    5.112700  26.63333  9.722311 0.3650430    more30  Valid             TRUE
-#>   ANOVA_group
-#> 1           A
-#> 2           A
-#> 3           A
-#> 4          AB
-#> 5          AB
-#> 6          AB
-

Any method labeled with an “A” can now be considered optimal among all tested methods.

-
anv_gg_df$optimal <- sapply(anv_gg_df$ANOVA_group, function(g) grepl("A", g))
-obj <- setPlottingData(obj, anv_gg_df)
-head(data.frame(anv_gg_df))
-#>   Method     Tissue Replicate Metabolite          Class Concentration imp_Conc
-#> 1      1 Drosophila        R1         C0 Acylcarnitines         203.0    203.0
-#> 2      1 Drosophila        R2         C0 Acylcarnitines          86.8     86.8
-#> 3      1 Drosophila        R3         C0 Acylcarnitines         246.0    246.0
-#> 4      1 Drosophila        R1         C2 Acylcarnitines          29.5     29.5
-#> 5      1 Drosophila        R2         C2 Acylcarnitines          15.8     15.8
-#> 6      1 Drosophila        R3         C2 Acylcarnitines          34.6     34.6
-#>   transf_Conc      Mean        SD        CV CV_thresh Status valid_replicates
-#> 1    7.665336 178.60000 82.357028 0.4611256    more30  Valid             TRUE
-#> 2    6.439623 178.60000 82.357028 0.4611256    more30  Valid             TRUE
-#> 3    7.942515 178.60000 82.357028 0.4611256    more30  Valid             TRUE
-#> 4    4.882643  26.63333  9.722311 0.3650430    more30  Valid             TRUE
-#> 5    3.981853  26.63333  9.722311 0.3650430    more30  Valid             TRUE
-#> 6    5.112700  26.63333  9.722311 0.3650430    more30  Valid             TRUE
-#>   ANOVA_group optimal
-#> 1           A    TRUE
-#> 2           A    TRUE
-#> 3           A    TRUE
-#> 4          AB    TRUE
-#> 5          AB    TRUE
-#> 6          AB    TRUE
-
-
- - - - - - - - - - -