Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AJG.Code review #11

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
141 changes: 141 additions & 0 deletions vignettes/TET2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
## ----message=FALSE, loadpkgs, eval=FALSE-----------------------------------------------------------------
#> install.packages("remotes")
#> install.packages("BiocManager")
#> library(BiocManager)
#> if (!require("GEOquery")) {
#> BiocManager::install("GEOquery")
#> library(GEOquery)
#> }
#> if(!require("limma")) {
#> BiocManager::install("limma")
#> library(limma)
#> }
#> #Kate told me I needed this and then I started Rstudio again and now it
#> #seems to work.
#> BiocManager::install("VanAndelInstitute/WorldsSimplestCodeReview")
#> library(tidyverse)
#> library(knitr)
#> knitr::opts_chunk$set(echo = TRUE)
#> knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
#> if (!requireNamespace("BiocManager", quietly = TRUE))
#> install.packages("BiocManager")
#>
#> library(devtools)
#> load_all("./")


## ---- tangle, eval = FALSE, message = FALSE, echo = TRUE-------------------------------------------------
#> knitr::knit("TET2.Rmd", tangle = TRUE)
#> [1] "TET2.R"


## ---- fetchGEO-------------------------------------------------------------------------------------------

library(limma)
library(GEOquery)
if (!exists("DNAme")) data(DNAme)

if (FALSE) { # this takes about 5 minutes:

# needed to fetch data
library(GEOquery)
MSK_HOVON <- getGEO("GSE24505")

# skip the expression data:
platform <- sapply(MSK_HOVON, annotation)
methylation <- which(platform == "GPL6604")
DNAme <- MSK_HOVON[[methylation]] # GPL6604, HG17_HELP_PROMOTER
DNAme$male <-ifelse(DNAme$characteristics_ch1=="sex (male.1_female.2): 1",1,0)
DNAme$TET2 <- ifelse(DNAme$characteristics_ch1.7 == "tet2: WT", 0, 1)
DNAme$IDH <- ifelse(DNAme$characteristics_ch1.8 == "idh1.idh2: WT", 0, 1)
DNAme$purity <- as.integer(DNAme$"bm_%blasts:ch1") / 100
save(DNAme, file="../data/DNAme.rda")

}

# how many probes, how many patients?
dim(DNAme)
# Features Samples
# 25626 394



## ---- heatmap, eval=TRUE---------------------------------------------------------------------------------

# always plot your data
library(ComplexHeatmap)
mutations <- t(as.matrix(pData(DNAme)[, c("TET2", "IDH")]))
Heatmap(mutations, col=c("lightgray","darkred"), name="mutant", column_km=4,
column_names_gp = gpar(fontsize = 7))



## ---- The OddBall----------------------------------------------------------------------------------------
library(tidyverse)
# one patient is the odd-ball here
as_tibble(DNAme$`idh1.idh2:ch1`) -> idh1_idh2
# since there is ch1 and 2, I compared both and they have the exact same information
# as_tibble(DNAme$`idh1.idh2:ch2`) -> idh1_idh2_next
# idh1_idh2 == idh1_idh2_next - returns TRUE
as_tibble(DNAme$`tet2:ch1`) -> tet
# as_tibble(DNAme$`tet2:ch2`) -> tet_2
# tet == tet_2 - returns TRUE
colnames(tet) <- c("TET")
colnames(idh1_idh2) <- c("IDH")
compiled <- cbind(tet, idh1_idh2)
View(compiled) # scrolled through and identified the patient/sample number that had the mutations in both TET2 and IDH

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great! Wish I could have done this!


## ---- TET2_vs_IDH----------------------------------------------------------------------------------------

# model TET2 and IDH1/2 mutant related hypermethylation
# note: there are plenty of confounders (pb%, bm%, wbc) that could be included
library(limma)

# simplest design
design1 <- with(pData(DNAme), model.matrix( ~ IDH + TET2 ))
fit1 <- eBayes(lmFit(exprs(DNAme), design1))
(IDH_diffmeth_probes_fit1 <- nrow(topTable(fit1,
coef=grep("IDH", colnames(design1)),
p.value=0.05, # change if you like
number=Inf)))
# 6513 probes for IDH

(TET_diffmeth_probes_fit1 <- nrow(topTable(fit1,
coef=grep("TET2", colnames(design1)),
p.value=0.05, # change if you like
number=Inf)))
# 6 probes for TET2

# control for sex
design2 <- with(pData(DNAme), model.matrix( ~ IDH + TET2 + male ))
fit2 <- eBayes(lmFit(exprs(DNAme), design2))
(IDH_diffmeth_probes_fit2 <- nrow(topTable(fit2,
coef=grep("IDH", colnames(design2)),
p.value=0.05, # change if you like
number=Inf)))
# 6651 probes for IDH

(TET2_diffmeth_probes_fit2 <- nrow(topTable(fit2,
coef=grep("TET", colnames(design2)),
p.value=0.05, # change if you like
number=Inf)))
# 7 probes for TET2

# control for blast count
design3 <- with(pData(DNAme), model.matrix( ~ IDH:purity + TET2:purity))
fit3 <- eBayes(lmFit(exprs(DNAme)[, as.integer(rownames(design3))], design3))

(IDH_diffmeth_probes_fit3 <- nrow(topTable(fit3,
coef=grep("IDH", colnames(design3)),
p.value=0.05, # change if you like
number=Inf)))
# 7450 probes for IDH:purity

(TET2_diffmeth_probes_fit3 <- nrow(topTable(fit3,
coef=grep("TET", colnames(design3)),
p.value=0.05, # change if you like
number=Inf)))
# 10 probes for TET2:purity


78 changes: 61 additions & 17 deletions vignettes/TET2.Rmd
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
title: "Code review: TET2 and hypermethylation"
author: "Tim Triche"
date: "November 22nd, 2021"
author: "Tim Triche.AJG"
date: "November 29th, 2021"
output:
html_document:
keep_md: true
Expand All @@ -11,29 +11,46 @@ vignette: >
\usepackage[utf8]{inputenc}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
library(devtools)
load_all("./")
```

# I trust that I need this but could not build this on my own
# Already eviewed by Svetlana Djirackor (THANKS)

# Installation

Install the WorldsSimplestCodeReview package, if you haven't.

```{r, loadpkgs, eval = FALSE, message = FALSE}
#install.packages("remotes")
#install.packages("BiocManager")
#BiocManager::install("VanAndelInstitute/WorldsSimplestCodeReview")
```{r message=FALSE, loadpkgs, eval=FALSE}
install.packages("remotes")
install.packages("BiocManager")
library(BiocManager)
if (!require("GEOquery")) {
BiocManager::install("GEOquery")
library(GEOquery)
}
if(!require("limma")) {
BiocManager::install("limma")
library(limma)
}
#Kate told me I needed this and then I started Rstudio again and now it
#seems to work.
BiocManager::install("VanAndelInstitute/WorldsSimplestCodeReview")
library(tidyverse)
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")

library(devtools)
load_all("./")
```

To extract just the R code, you can use knitr::knit(input, tangle=TRUE):

```{r, tangle, eval = FALSE, message = FALSE, echo = FALSE}
# knitr::knit("TET2.Rmd", tangle = TRUE)
# [1] "TET2.R"
```{r, tangle, eval = FALSE, message = FALSE, echo = TRUE}
knitr::knit("TET2.Rmd", tangle = TRUE)
"TET2.R"
#when I run this chunk nothing appears to happen.
```

# Introduction
Expand Down Expand Up @@ -77,25 +94,46 @@ if (FALSE) { # this takes about 5 minutes:

# how many probes, how many patients?
dim(DNAme)
# Features Samples
# 25626 394
#I get the same answer as what the vignette has
#It made something. A large data set of a format that I don't understand
view(DNAme)

```

### Some contrasts

Is it the case that TET2, IDH1, and IDH2 mutations are exclusive?
_With the exception of GSM604380/patient 316, TET2 and IDH1/2 mutations are exclusive._

```{r, heatmap, eval=TRUE}

# always plot your data
install.packages("nat")
install.packages("ComplexHeatmap")
library(ComplexHeatmap)
mutations <- t(as.matrix(pData(DNAme)[, c("TET2", "IDH")]))
Heatmap(mutations, col=c("lightgray","darkred"), name="mutant", column_km=4,
column_names_gp = gpar(fontsize = 7))

```

### Healthy curiosity
```{r, The OddBall}
library(tidyverse)
# one patient is the odd-ball here
as_tibble(DNAme$`idh1.idh2:ch1`) -> idh1_idh2
# since there is ch1 and 2, I compared both and they have the exact same information
# as_tibble(DNAme$`idh1.idh2:ch2`) -> idh1_idh2_next
# idh1_idh2 == idh1_idh2_next - returns TRUE
as_tibble(DNAme$`tet2:ch1`) -> tet
# as_tibble(DNAme$`tet2:ch2`) -> tet_2
# tet == tet_2 - returns TRUE
colnames(tet) <- c("TET")
colnames(idh1_idh2) <- c("IDH")
compiled <- cbind(tet, idh1_idh2)
View(compiled) # scrolled through and identified the patient/sample number that had the mutations in both TET2 and IDH
```

Do we see genome-wide hypermethylation from TET2 mutations?

```{r, TET2_vs_IDH}
Expand Down Expand Up @@ -151,3 +189,9 @@ fit3 <- eBayes(lmFit(exprs(DNAme)[, as.integer(rownames(design3))], design3))
# 10 probes for TET2:purity

```

I'm unsure of how to interpret the code above:
- The annotation/description of the designs are unclear.
- Why would we run the code this way?
- What do the probe numbers mean?
- How can this be translated into assessing genome-wide methylation?