-
Notifications
You must be signed in to change notification settings - Fork 5
/
notes_plots_largerclades.R
114 lines (80 loc) · 4.29 KB
/
notes_plots_largerclades.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
library(seqinr)
source("getClades_size.R")
source("plotClades.R")
source("larger_scale_features.R")
timeinfo = read.csv("Data/H3N2_labels_clades_dates.csv", header = F); head(timeinfo)
aln=read.fasta("Data/H3N2.fa")
gdh3n2 = read.tree("rootedRAxML_H3N2.tree")
#############################################
# notes and testing my larger-scale features
#############################################
flu=read.tree("timetreeH3N2copy.nwk")
gdh3n2=read.tree("rootedRAxML_H3N2.tree")
clades2019= pickClades(flu, minSize = 300, maxSize = 500)
plotClades(flu, clades2019, show.tip.label = F, trysize = "big")
# my functions are timechop (sets time bins), divtt, ltt, gendistprofiles (diversity through time),
# lttgendist, and divttgendist
myclade = extract.clade(flu, node = clades2019$nodes[6])
plot(myclade,show.tip.label=F)
## compute things
# things that are NOT cumulative
lttinfo=as.data.frame(ltt.plot.coords(myclade,backward = F))
glttd=lttgendist(timetree = myclade, gdtree = gdh3n2) # ltt through gen dist
# a couple functions make things both cum and non-cum
gg=gendistprofiles(myclade, timechop(myclade, 30),gdtree = gdh3n2,mode = "tree")
ggt = gendistprofiles(myclade, timechop(myclade,30), aln=aln, mode="alignment")
# things that ARE cumulative
# number of tips through time
d=divtt(myclade) ;
dg=divttgendist(timetree=myclade,gdtree=gdh3n2,maxdate=NULL) # tips through gen dist
p1=ggtree(myclade)
## plot things that are NOT cumulative
# lineages through time
n1=ggplot(data=lttinfo, aes(x=time,y=N))+geom_line()+ggtitle("Lineages through time") +
theme(plot.title = element_text(size = 8))
n2= ggplot(data.frame(glttd), aes(x=time, y=N))+geom_line()+
ggtitle("Lineages through gen dist pseudo-time") +
theme(plot.title = element_text(size = 8))
n3=ggplot(data=gg, aes(x=time, y=now.diversity))+geom_line()+
ggtitle("Gen. diversity in gd tree") +
theme(plot.title = element_text(size = 8))
n4=ggplot(data=ggt, aes(x=time, y=now.diversity))+geom_point()+
ggtitle("Gen. div in alignment") +
theme(plot.title = element_text(size = 8))
# things that are cumulative
c1=ggplot(data=d, aes(x=time, y=Nsplits))+geom_line()+ ggitle("Num tips through time") +
theme(plot.title = element_text(size = 8)) # tips through time
c2=ggplot(dg, aes(x=time,y=Ngdist))+geom_line()+
ggtitle("Lineages through genetic distance pseudo-time") +
theme(plot.title = element_text(size = 8))
c3=ggplot(data=gg, aes(x=time, y=cum.diversity))+geom_line()+
ggtitle("Cumulative gen diversity in gd tree") +
theme(plot.title = element_text(size = 8))
c4=ggplot(data=ggt, aes(x=time, y=cum.diversity))+geom_point()+
ggtitle("Cumulative gen diversity in alignment") +
theme(plot.title = element_text(size = 8))
# source("multiplot.R") # available in scater package which my computer says isn't avail for R 3.4.4
# so instead I have read it fro mthe disk as I have used it before
dev.set(2)
multiplot(p1,n1,n2,n3,n4, ncol=1)
dev.set(5)
multiplot(p1,c1,c2,c3,c4, ncol=1)
## have made all that into a function getManyPlots.R
H3N2_labels_clades_dates <- load_labels_clades_dates("Data/H3N2_labels_clades_dates.csv")
flu2014 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2014-02-28"))
comp2014 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2015-05-30"))
clades2014 <- pickClades(flu2014, 200, 400)
flu2015 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2015-02-28"))
comp2015 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2016-05-30"))
clades2015 <- pickClades(flu2015, 200, 400)
flu2016 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2016-02-28"))
comp2016 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2017-05-30"))
clades2016 <- pickClades(flu2016, 200, 400)
flu2017 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2017-02-28"))
comp2017 <- truncate_tree_by_date(flu, H3N2_labels_clades_dates, as.Date("2018-05-30"))
clades2017 <- pickClades(flu2017, 200, 400)
growth2014 <- growth_ratios_for_clades(flu2014, comp2014, clades2014)
growth2015 <- growth_ratios_for_clades(flu2015, comp2015, clades2015)
growth2016 <- growth_ratios_for_clades(flu2016, comp2016, clades2016)
growth2017 <- growth_ratios_for_clades(flu2017, comp2017, clades2017)
getManyPlots(extract.clade(flu2016, c2016$nodes[15]), aln, gdh3n2)