-
Notifications
You must be signed in to change notification settings - Fork 0
/
microbiome_heatmaps.R
133 lines (99 loc) · 4.63 KB
/
microbiome_heatmaps.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# ----------------------------------------------------------------------
# FLS R group 2015-06-09: Basic heatmaps of micriobiome data
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
# INTRO
# ----------------------------------------------------------------------
# Such choice, many defaults (wow)
# - heatmap()
# - heatmap.2()
# - aheatmap()
# - heatplot()
# - ggplot2's geom_tile()
# These are mostly pretty similar, but defaults vary a lot
# OTUs for fecal 16S sequences from mothers and their monozygotic twins. Reyes et al. (2013)
# Were the abundance values not already proportional, we could do:
# reyes_prop <- reyes/rowSums(reyes)
# Required packages: vegan, RColorBrewer, gplots, ggplot2, reshape, stringr
reyes <- read.csv('16s_taxa_all.rdp_9.scaled.tsv', sep='\t')
# reyes <- read.csv('virome_taxa_all.lambda.scaled.tsv', sep='\t')
rownames(reyes) <- reyes$TAXON
reyes <- reyes[, -1]
reyes <- as.matrix(reyes)
# ----------------------------------------------------------------------
# HEATMAP()
# ----------------------------------------------------------------------
# Minimum viable heatmap with woeful defaults
heatmap(reyes)
heatmap(reyes, Colv=NA, Rowv=NA)
# Some colour
library(RColorBrewer)
jBuPuFun <- colorRampPalette(brewer.pal(n = 9, "BuPu"))
paletteSize <- 256
jBuPuPalette <- jBuPuFun(paletteSize)
# Discard taxa to improve readability?
# ...High pass filter? (discard rows with low row-wise maxima)
taxon_max_abundances <- apply(reyes, 1, max)
reyes_hpf <- reyes[which(taxon_max_abundances > 0.001),]
heatmap(reyes_hpf, Colv=NA, Rowv=NA, col=jBuPuPalette)
# low_abundance_taxa = names(which(max_taxon_abundances < 0.01))
# reyes_cull <- reyes[, -which(names(reyes) %in% low_abundance_taxa)]
# ...Or alternatively discard rows with low variance (probably uninteresting)
taxon_variances <- apply(reyes, 1, var)
reyes_most_variable <- reyes[taxon_variances > quantile(taxon_variances, 0.8),]
heatmap(reyes_most_variable, Colv=NA, Rowv=NA, col=jBuPuPalette)
# Now, but using an appropriate distance measure for clustering taxa
# Vegan library Provides Bray-Curtis dissimilarity measure
library(vegan)
bray_dist_mat <- vegdist(reyes_most_variable, method='bray')
bray_clusters <- hclust(bray_dist_mat, 'aver')
heatmap(as.matrix(reyes_most_variable), Rowv=as.dendrogram(bray_clusters), Colv=NA, col=jBuPuPalette)
# Clustering rows AND columns too
bray_dist_mat_t <- vegdist(t(reyes_most_variable), method='bray')
bray_clusters_t <- hclust(bray_dist_mat_t, 'aver')
heatmap(as.matrix(reyes_most_variable),
Rowv=as.dendrogram(bray_clusters),
Colv=as.dendrogram(bray_clusters_t),
col=jBuPuPalette)
# ----------------------------------------------------------------------
# HEATMAP.2()
# ----------------------------------------------------------------------
# gplots' heatmap.2() is more commonly used (AFAIAA)
# Can indicate experimental groups / covariates
# Histograms
# Assign each row to one of two made-up categories
example_categories <- round(runif(n = 55, min = 1, max = 2)) # 1 or 2
example_categories <- replace(example_categories, which(example_categories == 1), "lightgreen")
example_categories <- replace(example_categories, which(example_categories == 2), "darkgreen")
cbind(row.names(reyes_most_variable), example_categories)
# Plot
library(gplots)
heatmap.2(as.matrix(reyes_most_variable),
Rowv=as.dendrogram(bray_clusters),
Colv=as.dendrogram(bray_clusters_t),
RowSideColors=example_categories,
trace='none',
col=jBuPuPalette)
# ----------------------------------------------------------------------
# GGPLOT2()
# ----------------------------------------------------------------------
# Basic heatmap using geom_tile()
library(ggplot2)
library(reshape)
# library(stringr)
# Wrangling... ggplot likes melted dataframes
reyes_most_variable_m <- melt(as.matrix(reyes_most_variable))
# reyes_most_variable_m$timepoint <- str_sub(reyes_most_variable_m$X2,-1,-1)
# reyes_most_variable_m$individual <- str_sub(reyes_most_variable_m$X2,1,-3)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")
gg <- (ggplot(reyes_most_variable_m,
aes(x = X2, y = X1, fill = value))
+ geom_tile()
+ scale_fill_gradientn(colours = myPalette(100))
+ scale_x_discrete(expand = c(0, 0))
+ scale_y_discrete(expand = c(0, 0))
+ coord_equal()
+ theme_bw())
print(gg)
# Mention phyloseq, heatplus
# Credit to http://www.molecularecologist.com/2013/08/making-heatmaps-with-r-for-microbiome-analysis/