diff --git a/mid_res/rev/R1.1_rev.R b/mid_res/rev/R1.1_rev.R new file mode 100644 index 0000000..b8b1ed6 --- /dev/null +++ b/mid_res/rev/R1.1_rev.R @@ -0,0 +1,280 @@ +## rev +# R version 4.0.5 -> now switching to R -- +library(Seurat) +suppressMessages(library(here)) +suppressMessages(library(tidyverse)) +source(here('functions/MattPMutils.r')) +source(here('functions/scglmmr_functions/pseudobulk_helpers.r')) +source(here('functions/scglmmr_functions/enrichment_analysis.r')) +source(here('functions/scglmmr_functions/model_result_interaction.r')) +source(here('functions/scglmmr_functions/single_cell_gene_module_scores.r')) + +# save paths +figpath = here('mid_res/rev/rev.figs/');dir.create(figpath) + +################################## +# R 1 +################################## + + +# Sequencing saturation +d = read.csv(file = 'data/full_metadata/full_sample_metadata.txt',sep = '\t') +# saturation +sat = read.csv(file = here('mid_res/rev/rev.data/sequencing.saturation.txt'), header = TRUE, sep = '\t') + +p = ggplot(sat, aes(x = lane, y = sequencing.saturation.cDNA)) + + geom_bar(fill = 'deepskyblue3', stat = 'identity') + + scale_y_continuous(breaks = scales::pretty_breaks(n = 11), limits = c(0,100)) + + ylab('CITE-seq mRNA sequencing saturation %') + + xlab('10x Chromium lane') +ggsave(p, filename = paste0(figpath, 'read depth.pdf'), width = 4, height = 3) + + +# UMI per cell variaiton explained +# read single cell data object +s = readRDS(file = here('data/h1h5_annotated_with_meta.rds')) + +# use models from fig 3 as example +# scored +md = s@meta.data %>% + filter(cohort == 'H1N1') %>% + filter(time_cohort == 'd1') %>% + mutate(group_id = factor(adjmfc.time, levels = c("d0 low", "d1 low", "d0 high", "d1 high"))) %>% + mutate(subjectid = factor(sampleid)) %>% + mutate(sex = factor(gender)) %>% + mutate(scaled.age = as.numeric(scale(age))) %>% + mutate(celltype = celltype_joint) + +# add a covariate for number of cells per sample +ncell_met = md %>% group_by(sample) %>% summarize(ncells = n()) +md$ncell = plyr::mapvalues(x = md$sample, from = ncell_met$sample, to = ncell_met$ncells) +md$ncell = as.numeric(md$ncell) +md$log10ncell = log10(md$ncell) + +# subset normalized RNA data +norm.rna = s@data[ ,rownames(md)] + +# get leading edge genes from cur. baseline mods +g0.sub = readRDS(file = here('mid_res/baseline_response/dataV3/g0.sub.rds')) +li.g0 = LeadingEdgeIndexed(gsea.result.list = g0.sub, padj.threshold = 0.05) +li.g0 = base::Filter(length, li.g0) + +# metadata by cell type +cts = names(li.g0) +md = md %>% filter( celltype %in% cts ) +ct.md = split( md, f = md$celltype ) + +# module score for each cell type of specific baseline enriched leading edge genes. +mod_scores = list() +for (i in 1:length(ct.md)) { + + # init data for subset i + rna = norm.rna[ ,rownames(ct.md[[i]])] + mod.list = li.g0[[i]] + + # calculate single cell score for baseline-enriched module + mod_scores[[i]] = WeightedCellModuleScore( + gene_matrix = rna, + module_list = mod.list, + threshold = 0, + cellwise_scaling = TRUE, + return_weighted = FALSE + ) + + # add a "null" score of Gaussian noise as a reference + mod_scores[[i]]$null = rnorm(n = nrow(mod_scores[[i]]), mean = 0, sd = 1) +} +names(mod_scores) = names(ct.md) + + +dd2 = data.frame(module.score = c( + mod_scores$BC_Naive$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$CD14_Mono$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$CD16_Mono$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$CD4_CD25_Tcell$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$CD4_Efct_Mem_Tcell$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$CD8_CD161_Tcell$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$CD8_Mem_Tcell$`LI.M7.2 enriched in NK cells (I)`, + mod_scores$MAIT_Like$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$mDC$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod_scores$NK$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING), + rbind(ct.md$BC_Naive,ct.md$CD14_Mono, ct.md$CD16_Mono, ct.md$CD4_CD25_Tcell, ct.md$CD4_Efct_Mem_Tcell, + ct.md$CD8_CD161_Tcell, ct.md$CD8_Mem_Tcell, ct.md$MAIT_Like, ct.md$mDC, ct.md$NK) +) + + +p = ggplot(dd2, aes(x = nUMI, y = module.score )) + + theme_bw() + + geom_smooth(method = 'lm', color = 'purple') + + facet_wrap(~celltype_joint,nrow = 2)+ + ggpubr::stat_cor(method = "pearson", aes(label = ..r.label..)) + + geom_point(data = dd2, mapping = aes(x = nUMI, y = module.score), size = 0.4, alpha = 0.1) + + ylab('single cell module score') + + ggtitle('NORMALIZED data (as used in manuscript) - variance explained by nUMI - single cells') +p +ggsave(p, filename = paste0(figpath, 'norm.vpartumi.singlecell.png'),width = 9 , height = 4) + +# non-normalized +# subset normalized RNA data +raw.rna = s@raw.data[ ,rownames(md)] + +# module score for each cell type of specific baseline enriched leading edge genes. +mod.raw = list() +for (i in 1:length(ct.md)) { + + # init data for subset i + rna = raw.rna[ ,rownames(ct.md[[i]])] + mod.list = li.g0[[i]] + + # calculate single cell score for baseline-enriched module + mod.raw[[i]] = WeightedCellModuleScore( + gene_matrix = rna, + module_list = mod.list, + threshold = 0, + cellwise_scaling = TRUE, + return_weighted = FALSE + ) + + # add a "null" score of Gaussian noise as a reference + mod.raw[[i]]$null = rnorm(n = nrow(mod_scores[[i]]), mean = 0, sd = 1) +} +names(mod.raw) = names(ct.md) + +dd3 = data.frame(module.score = c( + mod.raw$BC_Naive$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$CD14_Mono$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$CD16_Mono$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$CD4_CD25_Tcell$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$CD4_Efct_Mem_Tcell$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$CD8_CD161_Tcell$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$CD8_Mem_Tcell$`LI.M7.2 enriched in NK cells (I)`, + mod.raw$MAIT_Like$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$mDC$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING, + mod.raw$NK$REACTOME_INTERFERON_ALPHA_BETA_SIGNALING), + rbind(ct.md$BC_Naive,ct.md$CD14_Mono, ct.md$CD16_Mono, ct.md$CD4_CD25_Tcell, ct.md$CD4_Efct_Mem_Tcell, + ct.md$CD8_CD161_Tcell, ct.md$CD8_Mem_Tcell, ct.md$MAIT_Like, ct.md$mDC, ct.md$NK) +) + + +p = ggplot(dd3, aes(x = nUMI, y = module.score )) + + theme_bw() + + geom_smooth(method = 'lm', color = 'red') + + facet_wrap(~celltype_joint, nrow = 2)+ + ggpubr::stat_cor(method = "pearson", aes(label = ..r.label..)) + + geom_point(data = dd3, mapping = aes(x = nUMI, y = module.score), size = 0.4, alpha = 0.1) + + ylab('single cell module score') + + ggtitle('NON NORMALIZED data - variance explained by nUMI - single cells') +p +ggsave(p, filename = paste0(figpath, 'non.norm.vpartumi.singlecell.png'), width = 9 , height = 4) + +# Mixed effects model effect size variance with and without covariates. +# data from original workflow +# mm res +mm1 = readRDS(file = here('../Flu_CITEseq_analysis/mid_res/baseline_response/dataV3/mm1.rds')) +mm2 = readRDS(file = here('../Flu_CITEseq_analysis/mid_res/baseline_response/dataV3/mm2.rds')) + +dd = data.frame(celltype = mm1$celltype, module = mm1$module, effect1 = mm1$estimatetime0_group2vs1, effect2 = mm2$estimatetime0_group2vs1) +p = ggplot(dd %>% filter(abs(effect1) > 0.1), aes(x = effect2, y = effect1) ) + + theme_bw() + + facet_wrap(~celltype, nrow = 2) + + geom_point(color = 'deepskyblue3') + + geom_vline(xintercept = 0) + + geom_hline(yintercept = 0) + + geom_abline(linetype = 'dashed') + + xlab('model 2 (multivariate) effect size: expression ~ response + cells per donor + age + sex') + + ylab('model 1 (unadjusted) effect size: expression ~ response') +ggsave(p, filename = paste0(figpath, 'effect.sizes.singlecell.module.score.pdf'), width = 9 , height = 4) + + +# calc n cells per individual per cell type +s@meta.data %>% + group_by(sample, celltype_joint) %>% + tally() %>% + ungroup() %>% + group_by(celltype_joint) %>% + summarize_at(.vars = 'n', .funs = c('median' = median, 'sd' = sd)) %>% + print(n=50) +# output +# celltype_joint median sd +# +# 1 BC_Mem 72.5 54.4 +# 2 BC_Naive 206. 193. +# 3 CD103_Tcell 12 9.83 +# 4 CD14_Mono 448. 331. +# 5 CD16_Mono 71.5 36.5 +# 6 CD38_Bcell 3 6.74 +# 7 CD4Naive_Tcell 539 256. +# 8 CD4_CD161_Mem_Tcell 124. 57.7 +# 9 CD4_CD25_Tcell 65.5 33.4 +# 10 CD4_CD56_Tcell 1 12.1 +# 11 CD4_CD57_Tcell 18 96.3 +# 12 CD4_Efct_Mem_Tcell 249 112. +# 13 CD8_CD161_Tcell 77 59.0 +# 14 CD8_Mem_Tcell 84 102. +# 15 CD8_NKT 3.5 60.0 +# 16 CD8_Naive_Tcell 294. 158. +# 17 DOUBLET 22 12.8 +# 18 HSC 5.5 3.35 +# 19 IgA_CD14_Mono 8 10.7 +# 20 MAIT_Like 39.5 77.3 +# 21 NK 160. 125. +# 22 mDC 29.5 20.7 +# 23 pDC 14 10.0 + +# make fig +cdat= s@meta.data %>% + group_by(sample, celltype_joint) %>% + tally() %>% + ungroup() %>% + group_by(celltype_joint) +csub = cdat %>% filter(celltype_joint %in% c('mDC', 'MAIT_Like', 'CD4_CD25_Tcell')) + +p = ggplot(csub, aes(x = celltype_joint, y = n )) + + theme_bw() + + geom_boxplot(outlier.shape = NA) + + geom_jitter(shape = 21, fill = 'deepskyblue3', alpha = 0.7) + + ggtitle('lower frequency cell types') + + ylab('number of cells (per sample - full cohort)') + + xlab('') +ggsave(p, filename = paste0(figpath, 'ncell.low.freq.pdf'), width = 4, height = 3) + +# n cells per mDC and plasmablast +s@meta.data %>% + group_by(sample) %>% + filter(celltype_joint == 'mDC') %>% + tally() %$% n %>% + quantile() +# 0% 25% 50% 75% 100% +# 2.0 19.5 29.5 43.5 80.0 + +s@meta.data %>% + group_by(sample, celltype_joint) %>% + summarize_at(.funs = 'tally', .vars = 'celltype_joint') %>% + tally %$% n %>% + quantile() +quantile +filter(celltype_joint == 'mDC') %>% + tally() %$% n %>% + quantile() + +s@meta.data %>% + group_by(sample) %>% + filter(celltype_joint == 'CD38_Bcell') %>% + tally() %$% n %>% + quantile() +# 0% 25% 50% 75% 100% +# 1 2 3 5 34 + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mid_res/rev/R1.2_rev_power.R b/mid_res/rev/R1.2_rev_power.R new file mode 100644 index 0000000..5dcbf88 --- /dev/null +++ b/mid_res/rev/R1.2_rev_power.R @@ -0,0 +1,210 @@ +# 4.2 +suppressMessages(library(here)) +suppressMessages(library(tidyverse)) +suppressMessages(library(simr)) +source(here('functions/MattPMutils.r')) +source(here('functions/scglmmr_functions/model_result_interaction.r')) + + +# save paths +figpath = here('mid_res/rev/rev.figs/');dir.create(figpath) +datapath = here('mid_res/rev/rev.data/');dir.create(datapath) +s = readRDS(file = here('data/h1h5_annotated_with_meta.rds')) +s@meta.data %>% + group_by(sample, celltype_joint) %>% + tally() %>% + filter(celltype_joint %in% c( "CD14_Mono", "mDC", "BC_Naive")) %>% + group_by(celltype_joint) %>% + summarize_at(.vars = 'n',.funs = median) + median(n) + + + # A tibble: 3 × 2 + # celltype_joint n + # + # 1 BC_Naive 206. + # 2 CD14_Mono 448. + # 3 mDC 29.5 +rm(s); gc() + +# load contrast samplemd +samplemd = readRDS(file = here('mid_res/3_H1N1_H5N1_joint_pseudobulk_DE/dataV4/samplemd12.rds')) +pb = readRDS(file = here('mid_res/3_H1N1_H5N1_joint_pseudobulk_DE/dataV4/pb12.rds')) + +# designmat for edgeR normalization +met = samplemd[ ,c('gender', 'scaledage', 'time.group')] +mat = model.matrix( ~ 0 + time.group + gender + scaledage, data = met) +betas = readRDS(file = here('mid_res/3_H1N1_H5N1_joint_pseudobulk_DE/dataV4/fit12e.rds')) +betas$CD14_Mono$coefficients +deltas = ExtractResult(model.fit.list = betas,what = 'lmer.z.ranks',coefficient.number = 1, 'delta') + + +# rank by absolute effect size +deltas = lapply(deltas, function(x) x %>% abs() %>% sort(decreasing = TRUE)) + +# pb data +power.calc = list() +names(pb) +pb = pb[c("CD14_Mono", "mDC", "BC_Naive")] +deltas = deltas[c( "CD14_Mono", "mDC", "BC_Naive")] + +# specify contrast +c00 = c(1,0,0,0) +c01 = c(0,1,0,0) +c10 = c(0,0,1,0) +c11 = c(0,0,0,1) +contrast_list = list( + "time1vs0_group2vs1" = (c01 - c00) - (c11 - c10) +) + +# write a custom function for simr to extract FC delta p values from marginal means contrast +test_delta <- function(m1, ...) { + # m1 to be the lme4 fit per celltype & gene + contrast_fit = emmeans::emmeans(m1, specs = ~time.group) %>% + emmeans::contrast(method = contrast_list) %>% + summary() + contrast_fit$p.value +} +attr(test_delta, "text") <- function(...) NULL + +power.calc = list() +fit.list = list() +for (i in 1:length(pb)) { + + ## process data + d= edgeR::DGEList(counts = pb[[i]], samples = samplemd) + gtable = edgeR::filterByExpr(y = d$counts, min.count = 3, design = mat) + print(names(pb)[i]);print(table(gtable)) + d = d[gtable, keep.lib.sizes=FALSE] + d = edgeR::calcNormFactors(object = d) + cpm = edgeR::cpm(d, log = TRUE) + + + #select 200 genes + top.genes = names(deltas[[i]])[1:200] # can tune + cpm.top = cpm[top.genes, ] + + + # contrast mixed model + f1 <- gene ~ 0 + time.group + gender + scaledage + (1|subjectid) + + fit.list = list() + for (u in 1:length(top.genes)) { + + # fit a single gene + gene.name = top.genes[u] + dat = data.frame(gene = cpm[top.genes[u],], samplemd) + m1 = lme4::lmer(formula = f1, data = dat) + + # simulate data based on model + ps = powerSim(fit = m1, test = test_delta, seed = 1990, nsim = 50) + + # custom function output is not captured; grab it + ps.res = capture.output(ps,type = 'output') + + # reformat and reformat again outside + pow = gsub(" ", "", ps.res[2]) + + # get model stuff + random_effect_var = insight::get_variance_random(m1, tolerance = 0) + residual_var = insight::get_variance_residual(m1) + + + # store results + fit.list[[u]] = data.frame( + celltype = names(pb)[i], + gene = gene.name, + lmer.z.statistic = deltas[[i]][[u]], + power.sim = pow, + random_effect_var, + residual_var + ) + + } + power.calc[[i]] = bind_rows(fit.list) +} +# names(power.calc) = names(pb) +power.data = bind_rows(power.calc) +saveRDS(power.data,file = paste0(datapath, 'power.data.rds')) +power.data= readRDS(file = here('mid_res/rev/rev.data/power.data.rds')) + +# reformat annoying powersim captured output from the custom function. +extract.power.ci = function(x) { + # Remove "%" + x <- gsub("%", "", x) + + # Extract numbers between "(" and "," + left_paren <- gregexpr("\\(", x)[[1]] + comma <- gregexpr(",", x)[[1]] + power.lower <- as.numeric(substr(x, left_paren + 1, comma - 1)) + + # Extract numbers between "," and ")" + right_paren <- gregexpr("\\)", x)[[1]] + power.upper <- as.numeric(substr(x, comma + 1, right_paren - 1)) + + # Extract numbers before "%" + x <- gsub("%", "", power.data$power.sim) + power = as.numeric(sub("\\(.*", "", x)) + + return(data.frame(power, power.lower, power.upper)) +} + +# Applying the function to the dataframe +power.df <- cbind(power.data, extract.power.ci(power.data$power.sim)) +saveRDS(power.df,file = paste0(datapath, 'power.df.rds')) +power.df = readRDS(file = here('mid_res/rev/rev.data/power.df.rds')) + +# plot of power analysis +p = ggplot(power.df, aes(x = reorder(gene, power), y = power, color = celltype))+ + theme_minimal() + + xlab('gene') + + geom_point(size = 1, show.legend = FALSE) + + geom_errorbar(aes(ymax = power.upper, ymin = power.lower), lwd = 0.1, show.legend = FALSE) + + theme(axis.text.x = element_blank()) + + theme(axis.ticks.x = element_blank()) + + facet_wrap(~celltype) + + ggsci::scale_color_d3() + +ggsave(p,filename = paste0(figpath, 'power.analysis.contrastmodel.pdf'), width = 8, height = 4) +power.df %>% filter(power > 70) %>% group_by(celltype) %>% tally() +# # A tibble: 3 × 2 +# 1 BC_Naive 77 +# 2 CD14_Mono 155 +# 3 mDC 72 + +p = +ggplot(power.df, aes(x = lmer.z.statistic, y = power)) + + theme_bw() + + geom_errorbar(aes(ymax = power.upper, ymin = power.lower), + lwd = 0.2, alpha = 0.5, show.legend = FALSE) + + geom_point(size = 1, show.legend = FALSE, alpha = 0.4) + + facet_wrap(~celltype, scales = 'free') + + ggsci::scale_color_d3() + + xlab('effect size') +p +ggsave(p,filename = paste0(figpath, 'power.analysis.contrastmodel.effectsize.pdf'), width = 10, height = 4) + +p = +ggplot(power.df, aes(x = power)) + + theme_bw() + + facet_wrap(~celltype, scales = 'free') + + geom_histogram(fill = 'deepskyblue3', color = 'black', bins = 12) +ggsave(p,filename = paste0(figpath, 'power.analysis.contrastmodel.distribution.pdf'), width = 10, height = 4) + + +p = + ggplot(power.df, aes(x = random_effect_var, y = power)) + + theme_minimal() + + geom_point(size = 1, show.legend = FALSE) + + facet_wrap(~celltype) + + ggsci::scale_color_d3() +ggsave(p,filename = paste0(figpath, 'power.analysis.contrastmodel.ranef.pdf'), width = 8, height = 4) + + +p = + ggplot(power.df, aes(x = residual_var, y = power)) + + theme_minimal() + + geom_point(size = 1, show.legend = FALSE) + + facet_wrap(~celltype) + + ggsci::scale_color_d3() +ggsave(p,filename = paste0(figpath, 'power.analysis.contrastmodel.resid.pdf'), width = 8, height = 4) diff --git a/mid_res/rev/R2.R b/mid_res/rev/R2.R new file mode 100644 index 0000000..dd23b70 --- /dev/null +++ b/mid_res/rev/R2.R @@ -0,0 +1,256 @@ +## rev +# R version 4.0.5 -> now switching to R -- +library(Seurat) +suppressMessages(library(here)) +suppressMessages(library(tidyverse)) +source(here('functions/MattPMutils.r')) +source(here('functions/scglmmr_functions/pseudobulk_helpers.r')) +source(here('functions/scglmmr_functions/enrichment_analysis.r')) +source(here('functions/scglmmr_functions/model_result_interaction.r')) +source(here('functions/scglmmr_functions/single_cell_gene_module_scores.r')) + +# save paths +figpath = here('mid_res/rev/rev.figs/');dir.create(figpath) +datapath = here('mid_res/rev/rev.data/') + + +# read single cell data object +s = readRDS(file = here('data/h1h5_annotated_with_meta.rds')) + +################################## +# R 2 +################################## + +# n cells cd4 mem tfh calc +med.cd4mem = s@meta.data %>% + group_by(sample, celltype_joint) %>% + tally() %>% + filter(celltype_joint == 'CD4_Efct_Mem_Tcell') %$% + n %>% + median() +est.ctfh = 0.2*0.1*med.cd4mem +est.ctfh +# [1] 4.98 + + + + +# AS03 theme +cu = c("grey48", "grey", "grey48", "deepskyblue3") +cu.alpha = sapply(cu, col.alpha, alpha = 0.4) %>% unname() +mtheme = list( + geom_boxplot(show.legend = FALSE, outlier.shape = NA), + theme_bw(base_size = 10.5), + theme(axis.text.x=element_text(angle = -90, hjust = 0)), + theme(strip.background = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + strip.text = element_text(face = "bold",family = "Helvetica"), + axis.text.y = element_text(size = 6), + axis.title.y = element_text(size = 10)) +) + + +# baseline bulk theme +cu1 = sapply(c('dodgerblue','grey', 'red'), col.alpha, 0.2) %>% unname() +cu2 = c('dodgerblue','grey', 'red') +bulktheme = list( + mtheme, + theme(axis.text.x=element_text(angle = 0, hjust = 0.5, color = 'black', size = 10), + axis.title.y = element_text(size = 14)), + geom_boxplot(show.legend = FALSE, outlier.shape = 21), + scale_y_continuous( breaks= scales::pretty_breaks(), expand = c(0.15,0)), + scale_x_discrete(labels = c("low", "mid", "high")), + xlab("Antibody Response"), + scale_fill_manual(values = cu1), + scale_color_manual(values = cu2) +) +my_compare = list(l1 = c("1", "0"), l2 = c("2", "0")) +my_compare_2 = list( l2 = c("Low.F", "Low.M")) + +# define adjuvant signatures +# leading edge combined signature +# upregulated gnees only +# mono and mDC specific and combined genes +mono.validated = readRDS(file = here('mid_res/nat_adj/generated_data/V4/mono.as03.sig.validated.rds')) +dc.validated = readRDS(file = here('mid_res/nat_adj/generated_data/V4/dc.as03.sig.validated.rds')) +adjuvant.signatures = c(list('Mono_AS03_validated' = mono.validated), list('DC_AS03_validated' = dc.validated)) + +# load average baseline high and low responders from unadjuvanted cohort +av0 = readRDS(file = here('mid_res/nat_adj/generated_data/V4/av0.rds')) + + +############################# +# CD14 Monocytes +############################# +# Monocyte combined AS03 signature average across time between groups +#natural adjuvant test +mono.sig.av2 = + av0$CD14_Mono %>% + filter(gene %in% adjuvant.signatures$Mono_AS03_validated) %>% + group_by(sample, response) %>% + summarize(meansig = mean(count)) + +# append sex and age +meta=read.csv(file = here('data/full_metadata/full_sample_metadata.txt'),sep = '\t') + +# append subject id +mono.sig.av2 = mono.sig.av2 %>% mutate(subjectid = str_sub(sample, 1,3)) + +# mapo sex and age to average data based on metadata match +mono.sig.av2$sex = plyr::mapvalues(x = mono.sig.av2$subjectid,from = meta$subjectid,to = meta$gender) +mono.sig.av2$age = plyr::mapvalues(x = mono.sig.av2$subjectid,from = meta$subjectid,to = meta$age) +mono.sig.av2$age = as.numeric(mono.sig.av2$age) +#mono.sig.av2$response.sex = paste(mono.sig.av2$response, mono.sig.av2$sex,sep = '.') + +p = ggplot(mono.sig.av2, aes(x = sex, y = meansig, fill = sex , color = sex)) + + theme_bw() + + geom_boxplot() + + ggsci::scale_color_d3() + + ggsci::scale_fill_d3(alpha = 0.5) + + ggpubr::stat_compare_means(paired = FALSE, show.legend = FALSE, method = 'wilcox') + + geom_jitter() + + ylab('Monocyte AS03 Adjuvant Signature') + + ggtitle('Baseline') + + theme(legend.position = 'none') + + xlab('sex') +p +ggsave(p, filename = paste0(figpath, 'MONO.baseline.naturaladjuvant.sex.pdf'), width = 2.1, height = 4) + + +## Repeat for mDC +mdc.sig.av = + av0$mDC %>% + filter(gene %in% adjuvant.signatures$DC_AS03_validated) %>% + group_by(sample, response) %>% + summarize(meansig = mean(count)) + +# append sex and age +# meta=read.csv(file = here('data/full_metadata/full_sample_metadata.txt'),sep = '\t') # done above + +# append subject id +mdc.sig.av = mdc.sig.av %>% mutate(subjectid = str_sub(sample, 1,3)) + +# mapo sex and age to average data based on metadata match +mdc.sig.av$sex = plyr::mapvalues(x = mdc.sig.av$subjectid,from = meta$subjectid,to = meta$gender) +mdc.sig.av$age = plyr::mapvalues(x = mdc.sig.av$subjectid,from = meta$subjectid,to = meta$age) +mdc.sig.av$age = as.numeric(mdc.sig.av$age) +#mdc.sig.av$response.sex = paste(mdc.sig.av$response, mdc.sig.av$sex,sep = '.') + + +#plot +p = ggplot(mdc.sig.av, aes(x = sex, y = meansig, fill = sex , color = sex)) + + theme_bw() + + geom_boxplot() + + ggsci::scale_color_d3() + + ggsci::scale_fill_d3(alpha = 0.5) + + ggpubr::stat_compare_means(paired = FALSE, show.legend = FALSE, method = 'wilcox') + + geom_jitter() + + ylab('mDC AS03 Adjuvant Signature') + + ggtitle('Baseline') + + theme(legend.position = 'none') + + xlab('sex') +p +ggsave(p, filename = paste0(figpath, 'mDC.baseline.naturaladjuvant.sex.pdf'), width = 2.1, height = 4) + + +################## +# Age effects +################## + +# age vs nat adj signature +p = ggplot(mono.sig.av2, aes(x = age, y = meansig )) + + theme_bw() + + geom_point() + + geom_smooth(method = 'lm') + + facet_wrap(~response) + + ggpubr::stat_cor(method = 'spearman') + + ylab('CD14 Mono Natural adjuvant signature') + + ggtitle('CD14 monocytes baseline unadjuvanted subjects') +p +ggsave(p, filename = paste0(figpath, 'mono.natadj.vs.age.pdf'), width = 6, height = 3) + +p2 = ggplot(mono.sig.av2, aes(x = age, y = meansig)) + + theme_bw() + + geom_smooth(method = 'lm') + + ggpubr::stat_cor(method = 'spearman') + + geom_point(aes(x = age, y = meansig, color = response)) + + scale_color_manual(values = c('dodgerblue', 'red'))+ + ylab('CD14 Mono Natural adjuvant signature') + + ggtitle('CD14 monocytes ') +p2 +p3 = cowplot::plot_grid(p, p2,rel_widths = c(1.3,1)) +ggsave(p3, filename = paste0(figpath, 'mono.natadj.vs.age.combined.pdf'), width = 9, height = 3) + +## +p = ggplot(mdc.sig.av, aes(x = age, y = meansig )) + + theme_bw() + + geom_point() + + geom_smooth(method = 'lm') + + facet_wrap(~response) + + ggpubr::stat_cor(method = 'spearman') + + ylab('mDC Natural adjuvant signature') + + ggtitle('mDC monocytes baseline unadjuvanted subjects') +ggsave(p, filename = paste0(figpath, 'mDC.natadj.vs.age.pdf'), width = 6, height = 3) + +p2 = ggplot(mdc.sig.av, aes(x = age, y = meansig)) + + theme_bw() + + geom_smooth(method = 'lm') + + ggpubr::stat_cor(method = 'spearman') + + geom_point(aes(x = age, y = meansig, color = response)) + + scale_color_manual(values = c('dodgerblue', 'red'))+ + ylab('mdc Natural adjuvant signature') + + ggtitle('CD14 monocytes ') +p2 +p3 = cowplot::plot_grid(p, p2,rel_widths = c(1.3,1)) +ggsave(p3, filename = paste0(figpath, 'mdc.natadj.vs.age.combined.pdf'), width = 9, height = 3) + + + +## Variance partition analysis +# age vs age-related signatures From Figure 1f within CD8 naive CD161 T cells and CD8 naive T cells +age.pos.cd161 = readRDS(file = here('mid_res/variance_partition/generated_data2/age.pos.cd161.rds')) +age.pos.cd8n = readRDS(file = here('mid_res/variance_partition/generated_data2/age.pos.cd8n.rds')) + +# CD161 age associated signature stratified by response +cd161av = + av0$CD8_CD161_Tcell %>% + filter(gene %in% age.pos.cd161) %>% + group_by(sample, response) %>% + summarize(meansig = mean(count)) +cd161av = cd161av %>% mutate(subjectid = str_sub(sample, 1,3)) +cd161av$age = plyr::mapvalues(x = cd161av$subjectid,from = meta$subjectid,to = meta$age) +cd161av$age = as.numeric(cd161av$age) + +# analyze correlation stratified by response +p = ggplot(cd161av, aes(x = age, y = meansig )) + + theme_bw() + + geom_point() + + geom_smooth(method = 'lm') + + facet_wrap(~response) + + ggpubr::stat_cor(method = 'spearman') + + ylab('CD8+CD161+ T Age-assoc. gene average') + + ggtitle('CD8 CD161 T cell baseline unadjuvanted subjects') +ggsave(p, filename = paste0(figpath, 'CD161agesig.vs.response.pdf'), width = 6, height = 3) + +cd8nav = + av0$CD8_Naive_Tcell %>% + filter(gene %in% age.pos.cd8n) %>% + group_by(sample, response) %>% + summarize(meansig = mean(count)) +cd8nav = cd8nav %>% mutate(subjectid = str_sub(sample, 1,3)) +cd8nav$age = plyr::mapvalues(x = cd8nav$subjectid,from = meta$subjectid,to = meta$age) +cd8nav$age = as.numeric(cd8nav$age) + +# analyze correlation stratified by response +p = ggplot(cd8nav, aes(x = age, y = meansig )) + + theme_bw() + + geom_point() + + geom_smooth(method = 'lm') + + facet_wrap(~response) + + ggpubr::stat_cor(method = 'spearman') + + ylab('CD8 Naive T Age-assoc. gene average') + + ggtitle('CD8 Naive T cell baseline unadjuvanted subjects') +p +ggsave(p, filename = paste0(figpath, 'CD8naive.agesig.vs.response.pdf'), width = 6, height = 3) +# diff --git a/mid_res/rev/rev.figs/CD161agesig.vs.response.pdf b/mid_res/rev/rev.figs/CD161agesig.vs.response.pdf new file mode 100644 index 0000000..08c6edf Binary files /dev/null and b/mid_res/rev/rev.figs/CD161agesig.vs.response.pdf differ diff --git a/mid_res/rev/rev.figs/CD8naive.agesig.vs.response.pdf b/mid_res/rev/rev.figs/CD8naive.agesig.vs.response.pdf new file mode 100644 index 0000000..0ac1b5c Binary files /dev/null and b/mid_res/rev/rev.figs/CD8naive.agesig.vs.response.pdf differ diff --git a/mid_res/rev/rev.figs/MONO.baseline.naturaladjuvant.sex.pdf b/mid_res/rev/rev.figs/MONO.baseline.naturaladjuvant.sex.pdf new file mode 100644 index 0000000..48f0242 Binary files /dev/null and b/mid_res/rev/rev.figs/MONO.baseline.naturaladjuvant.sex.pdf differ diff --git a/mid_res/rev/rev.figs/effect.sizes.singlecell.module.score.pdf b/mid_res/rev/rev.figs/effect.sizes.singlecell.module.score.pdf new file mode 100644 index 0000000..eee4974 Binary files /dev/null and b/mid_res/rev/rev.figs/effect.sizes.singlecell.module.score.pdf differ diff --git a/mid_res/rev/rev.figs/mDC.baseline.naturaladjuvant.sex.pdf b/mid_res/rev/rev.figs/mDC.baseline.naturaladjuvant.sex.pdf new file mode 100644 index 0000000..92a3e4c Binary files /dev/null and b/mid_res/rev/rev.figs/mDC.baseline.naturaladjuvant.sex.pdf differ diff --git a/mid_res/rev/rev.figs/mdc.natadj.vs.age.combined.pdf b/mid_res/rev/rev.figs/mdc.natadj.vs.age.combined.pdf new file mode 100644 index 0000000..981e2b5 Binary files /dev/null and b/mid_res/rev/rev.figs/mdc.natadj.vs.age.combined.pdf differ diff --git a/mid_res/rev/rev.figs/mono.natadj.vs.age.combined.pdf b/mid_res/rev/rev.figs/mono.natadj.vs.age.combined.pdf new file mode 100644 index 0000000..d436966 Binary files /dev/null and b/mid_res/rev/rev.figs/mono.natadj.vs.age.combined.pdf differ diff --git a/mid_res/rev/rev.figs/ncell.low.freq.pdf b/mid_res/rev/rev.figs/ncell.low.freq.pdf new file mode 100644 index 0000000..eef95a5 Binary files /dev/null and b/mid_res/rev/rev.figs/ncell.low.freq.pdf differ diff --git a/mid_res/rev/rev.figs/power.analysis.contrastmodel.distribution.pdf b/mid_res/rev/rev.figs/power.analysis.contrastmodel.distribution.pdf new file mode 100644 index 0000000..6380928 Binary files /dev/null and b/mid_res/rev/rev.figs/power.analysis.contrastmodel.distribution.pdf differ diff --git a/mid_res/rev/rev.figs/power.analysis.contrastmodel.effectsize.pdf b/mid_res/rev/rev.figs/power.analysis.contrastmodel.effectsize.pdf new file mode 100644 index 0000000..92bbe71 Binary files /dev/null and b/mid_res/rev/rev.figs/power.analysis.contrastmodel.effectsize.pdf differ diff --git a/mid_res/rev/rev.figs/read depth.pdf b/mid_res/rev/rev.figs/read depth.pdf new file mode 100644 index 0000000..0d53e8b Binary files /dev/null and b/mid_res/rev/rev.figs/read depth.pdf differ diff --git a/mid_res/rev/tfh/1.tfh.baseline.R b/mid_res/rev/tfh/1.tfh.baseline.R new file mode 100644 index 0000000..c5fb35f --- /dev/null +++ b/mid_res/rev/tfh/1.tfh.baseline.R @@ -0,0 +1,98 @@ +suppressMessages(library(tidyverse)) +suppressMessages(library(here)) + +source(here('functions/MattPMutils.r')) + +figpath = here('mid_res/rev/tfh/figures/'); dir.create(figpath) +datapath = here('mid_res/rev/tfh/generated_data/'); dir.create(datapath) + +titer = read.csv(file = here('data/CHI_H1N1_data/titer/titer_processed.txt'),header = TRUE,sep = '\t') +titer$Subject = as.character(titer$Subject) + +tester= read.csv(file = here('mid_res/rev/tfh/T4.exported.txt'),header= TRUE,sep = '\t') +colnames(tester) + +# function provided by Mani +parse.T4 <- function(day, controls=F, counts=F) +{ + hdr = read.delim(here("mid_res/rev/tfh/T4.exported.txt"), stringsAsFactors=F, header=F, nrows=1) + a = read.delim(here("mid_res/rev/tfh/T4.exported.txt"), stringsAsFactors=F) + stopifnot(grepl(".*T4 H1N1-", a$Sample), length(hdr)==ncol(a)) + if (!controls) { a = a[!grepl("CHI-007",a$Sample),]; } else { a$Sample = gsub("CHI-007","CHI007",a$Sample); } + + # get the Tfh1 and Tfh1/Tfh17 cell popns, along with their CD278+PD1+ subsets. + cidx = c(IDhlg = which(hdr == "Lymphocyte/single cells/Alive cells,Count"), #higher-level gate in the hierarchy + ID242 = grep("Tfh1,Count$",hdr), + ID244 = grep("Tfh1\\/.*CD278\\+.*PD\\-1\\+,Count$",hdr), + ID247 = grep("Tfh1 Tfh17,Count$",hdr), + ID249 = grep("Tfh1 Tfh17\\/.*CD278\\+.*PD\\-1\\+,Count$",hdr)) + stopifnot(length(cidx)==5) + print(cbind(names(cidx), t(hdr[cidx]))) + stopifnot(make.names(hdr)[cidx] == colnames(a)[cidx]) + + a$subject = do.call(rbind, strsplit(a$Sample, "-"))[,2] + a$day = gsub("day ", "day", do.call(rbind, strsplit(a$Sample, "-"))[,3]) + print(table(a$day)) + if (!controls) { a = a[a$day==day,]; } else { a = a[a$subject=="CHI007",]; } + print(table(a$day)) + print(table(a$subject)) + + if (counts) { a[,cidx['IDhlg']]=1; } + stopifnot(names(cidx)[1]=="IDhlg") + b = matrix(NA, nrow(a), length(cidx)-1, dimnames=list(paste0("X",a$subject,ifelse(grepl("^day",a$day),"",paste0(".",a$day))), names(cidx)[-1])) + for (i in names(cidx)[-1]) + { + b[,i] = a[,cidx[i]]/a[,cidx['IDhlg']] + } + stopifnot(b >= 0, all(b <= 1) || counts) + b = t(b) + #write.table(file='tmp.T4.day0.txt', b, quote=F, sep="\t") + b +} +d = parse.T4(day = 'day0') +d2 = d %>% + as.data.frame() %>% + t() %>% + as.data.frame() %>% + rownames_to_column('subject') %>% + mutate(subject = as.character(str_sub(subject, 2,4))) %>% + filter(subject %in% titer$Subject) %>% + mutate(adjmfc = plyr::mapvalues(x = subject, from = titer$Subject,to = titer$adjMFC_class)) + +set.seed(1992) +d3 = d2 %>% + filter(!adjmfc=='NaN') %>% + filter(adjmfc %in% c(0,2)) %>% + rename('AdjMFC' = adjmfc) +d3$AdjMFC[d3$AdjMFC == 0] <- 'low' +d3$AdjMFC[d3$AdjMFC == 2] <- 'high' +d3$AdjMFC= factor(d3$AdjMFC, levels = c('low','high')) +cu =c(col.alpha('dodgerblue',0.8),col.alpha('red', 0.8)) + +# TFH1 +p = ggplot(d3, aes(x = AdjMFC, y = ID242, fill = AdjMFC) )+ + theme_bw() + + geom_violin(show.legend = FALSE, trim = FALSE) + + geom_jitter(show.legend = FALSE, width = 0.3) + + ggpubr::stat_compare_means(method = 'wilcox',paired = FALSE) + + scale_fill_manual(values =cu) + + ylab('TFH percent of lymphocytes') + +# TFHPd1 +p2 = ggplot(d3, aes(x = AdjMFC, y = ID244, fill = AdjMFC) )+ + theme_bw() + + geom_violin(show.legend = FALSE,trim = FALSE) + + geom_jitter(show.legend = FALSE, width = 0.3) + + ggpubr::stat_compare_means(method = 'wilcox',paired = FALSE) + + scale_fill_manual(values =cu) + + ylab('PD1+ TFH percent of lymphocytes') + +# combine +p3 = cowplot::plot_grid(p,p2, nrow = 1) + +# save +ggsave(p3,filename = paste0(figpath,'tfh.baseline.pdf'), width = 7, height = 3) + +# save data +saveRDS(object = d,file = paste0(datapath,'d.rds')) + diff --git a/mid_res/rev/tfh/1.tfh.d7.R b/mid_res/rev/tfh/1.tfh.d7.R new file mode 100644 index 0000000..1197409 --- /dev/null +++ b/mid_res/rev/tfh/1.tfh.d7.R @@ -0,0 +1,292 @@ +suppressMessages(library(tidyverse)) +suppressMessages(library(here)) +source(here('functions/MattPMutils.r')) + +figpath = here('mid_res/rev/tfh/figures_7/'); dir.create(figpath) +datapath = here('mid_res/rev/tfh/generated_data_7/'); dir.create(datapath) + +titer = read.csv(file = here('data/CHI_H1N1_data/titer/titer_processed.txt'),header = TRUE,sep = '\t') +titer$Subject = as.character(titer$Subject) + +tester= read.csv(file = here('mid_res/rev/tfh/T4.exported.txt'),header= TRUE,sep = '\t') +colnames(tester) + +# function provided by Mani. +# modify in steps below to load multi timepoints + +# parse.T4 <- function(day, controls=F, counts=F) +# { +# hdr = read.delim(here("mid_res/rev/tfh/T4.exported.txt"), stringsAsFactors=F, header=F, nrows=1) +# a = read.delim(here("mid_res/rev/tfh/T4.exported.txt"), stringsAsFactors=F) +# stopifnot(grepl(".*T4 H1N1-", a$Sample), length(hdr)==ncol(a)) +# if (!controls) { a = a[!grepl("CHI-007",a$Sample),]; } else { a$Sample = gsub("CHI-007","CHI007",a$Sample); } +# +# # get the Tfh1 and Tfh1/Tfh17 cell popns, along with their CD278+PD1+ subsets. +# cidx = c(IDhlg = which(hdr == "Lymphocyte/single cells/Alive cells,Count"), #higher-level gate in the hierarchy +# ID242 = grep("Tfh1,Count$",hdr), +# ID244 = grep("Tfh1\\/.*CD278\\+.*PD\\-1\\+,Count$",hdr), +# ID247 = grep("Tfh1 Tfh17,Count$",hdr), +# ID249 = grep("Tfh1 Tfh17\\/.*CD278\\+.*PD\\-1\\+,Count$",hdr)) +# stopifnot(length(cidx)==5) +# print(cbind(names(cidx), t(hdr[cidx]))) +# stopifnot(make.names(hdr)[cidx] == colnames(a)[cidx]) +# +# a$subject = do.call(rbind, strsplit(a$Sample, "-"))[,2] +# a$day = gsub("day ", "day", do.call(rbind, strsplit(a$Sample, "-"))[,3]) +# print(table(a$day)) +# if (!controls) { a = a[a$day==day,]; } else { a = a[a$subject=="CHI007",]; } +# print(table(a$day)) +# print(table(a$subject)) +# +# if (counts) { a[,cidx['IDhlg']]=1; } +# stopifnot(names(cidx)[1]=="IDhlg") +# b = matrix(NA, nrow(a), length(cidx)-1, +# dimnames= list( +# paste0("X",a$subject,ifelse(grepl("^day",a$day),"",paste0(".",a$day))), +# names(cidx)[-1] +# ) +# ) +# for (i in names(cidx)[-1]) +# { +# b[,i] = a[,cidx[i]]/a[,cidx['IDhlg']] +# } +# stopifnot(b >= 0, all(b <= 1) || counts) +# b = t(b) +# #write.table(file='tmp.T4.day0.txt', b, quote=F, sep="\t") +# b +# } + + +# modify to look at day 0 and 7 to calc fc +#parse.T4 <- function(day, controls=F, counts=F){ +hdr = read.delim( + here("mid_res/rev/tfh/T4.exported.txt"), + stringsAsFactors = F, + header = F, + nrows = 1 +) +a = read.delim(here("mid_res/rev/tfh/T4.exported.txt"), stringsAsFactors = FALSE) +stopifnot(grepl(".*T4 H1N1-", a$Sample), length(hdr) == ncol(a)) +# if (!controls) { +a = a[!grepl("CHI-007", a$Sample), ] +# } else { +# a$Sample = gsub("CHI-007", "CHI007", a$Sample) +# } + +# get the Tfh1 and Tfh1/Tfh17 cell popns, along with their CD278+PD1+ subsets. +cidx = c( + IDhlg = which(hdr == "Lymphocyte/single cells/Alive cells,Count"), + #higher-level gate in the hierarchy + ID242 = grep("Tfh1,Count$", hdr), + ID244 = grep("Tfh1\\/.*CD278\\+.*PD\\-1\\+,Count$", hdr), + ID247 = grep("Tfh1 Tfh17,Count$", hdr), + ID249 = grep("Tfh1 Tfh17\\/.*CD278\\+.*PD\\-1\\+,Count$", hdr) +) +stopifnot(length(cidx) == 5) +print(cbind(names(cidx), t(hdr[cidx]))) +stopifnot(make.names(hdr)[cidx] == colnames(a)[cidx]) + +a$subject = do.call(rbind, strsplit(a$Sample, "-"))[, 2] +a$day = gsub("day ", "day", do.call(rbind, strsplit(a$Sample, "-"))[, 3]) +print(table(a$day)) + +#if (!controls) { +# a = a[a$day == day, ] +a = a[a$day %in% c('day0', 'day7'), ] +# } else { +# a = a[a$subject == "CHI007", ] +# } +print(table(a$day)) +print(table(a$subject)) + +# if (counts) { +# a[, cidx['IDhlg']] = 1 +# } +stopifnot(names(cidx)[1] == "IDhlg") + +# Append days onto sample names +b = matrix(NA, nrow(a), length(cidx)-1, + dimnames= list( + #paste0("X",a$subject,ifelse(grepl("^day",a$day),"",paste0(".",a$day))), + paste0(a$subject,a$day), + names(cidx)[-1] + ) +) + +# calc fc matrix as in original function +for (i in names(cidx)[-1]){ + b[, i] = a[, cidx[i]] / a[, cidx['IDhlg']] +} +stopifnot(b >= 0, all(b <= 1) || counts) + +# make dataframe +bd = as.data.frame(b) +bd = bd %>% rownames_to_column('sample') +fc = bd %>% + pivot_longer(!sample, names_to = 'population', values_to = 'freq') %>% + mutate(day = str_sub(sample, -4,-1)) %>% + arrange(population) %>% + mutate(fold_change = lead(freq) / freq) %>% # non log FC Data. + filter(day == 'day0') %>% + select(-day) %>% + mutate(subject = str_sub(sample,1,3)) %>% + filter(subject %in% titer$Subject) %>% + mutate(AdjMFC = plyr::mapvalues(x = subject, from = titer$Subject,to = titer$adjMFC_class)) + +fc$AdjMFC[fc$AdjMFC == '0'] <- 'low' +fc$AdjMFC[fc$AdjMFC == '2'] <- 'high' +fc$AdjMFC = factor(fc$AdjMFC, levels = c('low', 'high')) + +# +set.seed(1992) +cu = c(col.alpha('dodgerblue',0.8),col.alpha('red', 0.8)) + + +p = ggplot(fc %>% filter(population == 'ID242') %>% filter(AdjMFC %in% c('low','high')), aes(x = AdjMFC, y = fold_change, fill = AdjMFC) )+ + theme_bw() + + geom_violin(show.legend = FALSE,trim = FALSE) + + geom_jitter(show.legend = FALSE, width = 0.3) + + ggpubr::stat_compare_means(method = 'wilcox',paired = FALSE) + + scale_fill_manual(values =cu) + + ylab('TFH day 7 vs baseline fold change') + +p2 = ggplot(fc %>% filter(population == 'ID244') %>% filter(AdjMFC %in% c('low','high')), aes(x = AdjMFC, y = fold_change, fill = AdjMFC) )+ + theme_bw() + + geom_violin(show.legend = FALSE,trim = FALSE) + + geom_jitter(show.legend = FALSE, width = 0.3) + + ggpubr::stat_compare_means(method = 'wilcox',paired = FALSE) + + scale_fill_manual(values =cu) + + ylab('PD1+ TFH day 7 vs baseline fold change') + +# combine +p3 = cowplot::plot_grid(p,p2, nrow = 1) +ggsave(p3,filename = paste0(figpath,'tfh.d7fc.pdf'), width = 7, height = 3) + +# what is the actual relative frquency of the cells +apply(bd[ ,2:5], 1, median) %>% median +# 0.001618353 # 0.1% across samples both timepoints + +# load adjuvant signatures +# d7fc vs nat adj sig +mono.validated = readRDS(file = here('mid_res/nat_adj/generated_data/V4/mono.as03.sig.validated.rds')) +dc.validated = readRDS(file = here('mid_res/nat_adj/generated_data/V4/dc.as03.sig.validated.rds')) +adjuvant.signatures = c(list('Mono_AS03_validated' = mono.validated), list('DC_AS03_validated' = dc.validated)) +# load average baseline high and low responders from unadjuvanted cohort +av0 = readRDS(file = here('mid_res/nat_adj/generated_data/V4/av0.rds')) +mono.sig.av2 = av0$CD14_Mono %>% + filter(gene %in% adjuvant.signatures$Mono_AS03_validated) %>% + group_by(sample, response) %>% + summarize(meansig = mean(count)) %>% + mutate(subject = str_sub(sample, 1,3)) + +# combine with tFH data +d4 = full_join(mono.sig.av2, fc, by = 'subject') +d4 = d4 %>% filter(response %in% c('Low', "High")) + +# tfh +p1 = ggplot(d4 %>% filter(population == 'ID242'), aes(x = fold_change , y = meansig)) + + theme_bw() + + geom_point() + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('TFH cell day 7 fold change') + + ylab('Baseline CD14 Monocyte \n Natural Adjuvant signature') + + ggtitle('high outlier included') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_combined.pdf'),width = 3, height = 3) + +# outlier exc +p1 = ggplot(d4 %>% filter(population == 'ID242')%>% filter(!subject == '209'), aes(x = fold_change , y = meansig)) + + theme_bw() + + geom_point() + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('TFH cell day 7 fold change') + + ylab('Baseline CD14 Monocyte \n Natural Adjuvant signature') + + ggtitle('high outlier excluded') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_combined_outlierexc.pdf'),width = 3, height = 3) + + + +# pd1 + +p1 = ggplot(d4 %>% filter(population == 'ID244'), aes(x = fold_change , y = meansig)) + + theme_bw() + + geom_point() + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('PD1+ TFH cell day 7 fold change') + + ylab('Baseline CD14 Monocyte \n Natural Adjuvant signature') + + ggtitle('high outlier included') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_combined_PD1.pdf'),width = 3, height = 3) + +# tfh +p1 = ggplot(d4 %>% filter(population == 'ID242'), aes(x = fold_change , y = meansig, color = response)) + + theme_bw() + + geom_point() + + facet_wrap(~response) + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('TFH cell day 7 fold change') + + ylab('Baseline CD14 Monocyte \n Natural Adjuvant signature') + + ggtitle('high responder outlier included') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_thfmain.pdf'),width = 7, height = 4) + + +p1 = ggplot(d4 %>% filter(population == 'ID242'), aes(x = fold_change , y = meansig, color = response)) + + theme_bw() + + geom_point() + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(aes(x = fold_change , y = meansig), inherit.aes = FALSE, show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('TFH cell day 7 fold change') + + ylab('Baseline CD14 Monocyte \n Natural Adjuvant signature') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_thfmain.pdf'), width = 5, height = 4) + +## take away the high value outlier and view trend +p1 = ggplot(d4 %>% filter(population == 'ID242') %>% filter(!subject == '209'), aes(x = fold_change , y = meansig, color = response)) + + theme_bw() + + geom_point() + + #facet_wrap(~response) + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('TFH cell day 7 fold change') + + ylab('Baseline CD14 Monocyte \n Natural Adjuvant signature') + + ggtitle('high responder outlier excluded') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_thfmain_outlierRM.pdf'), width = 7, height = 4) + + +mdc= av0$mDC %>% + filter(gene %in% adjuvant.signatures$DC_AS03_validated) %>% + group_by(sample, response) %>% + summarize(meansig = mean(count)) %>% + mutate(subject = str_sub(sample, 1,3)) + +# combine with tFH data +d4 = full_join(mdc, fc, by = 'subject') +d4 = d4 %>% filter(response %in% c('Low', "High")) + +# tfh +p1 = ggplot(d4 %>% filter(population == 'ID242'), aes(x = fold_change , y = meansig)) + + theme_bw() + + geom_point() + + stat_smooth(method = 'lm', color = 'black', show.legend = FALSE) + + ggpubr::stat_cor(show.legend = FALSE) + + scale_color_manual(values = cu) + + xlab('TFH cell day 7 fold change') + + ylab('Baseline CD14 mDC \n Natural Adjuvant signature') + + ggtitle('high outlier included') +p1 +ggsave(p1,filename = paste0(figpath,'tfh.d7fc.NATADJ_sig_MDC_combined.pdf'),width = 3, height = 3) + +#################################### + diff --git a/mid_res/rev/tfh/figures/tfh.baseline.pdf b/mid_res/rev/tfh/figures/tfh.baseline.pdf new file mode 100644 index 0000000..c082027 Binary files /dev/null and b/mid_res/rev/tfh/figures/tfh.baseline.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_MDC_combined.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_MDC_combined.pdf new file mode 100644 index 0000000..b30f9e3 Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_MDC_combined.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined.pdf new file mode 100644 index 0000000..013b530 Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined_PD1.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined_PD1.pdf new file mode 100644 index 0000000..b5e589b Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined_PD1.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined_outlierexc.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined_outlierexc.pdf new file mode 100644 index 0000000..6ee5bbe Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_combined_outlierexc.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_thfmain.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_thfmain.pdf new file mode 100644 index 0000000..9d53637 Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_thfmain.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_thfmain_outlierRM.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_thfmain_outlierRM.pdf new file mode 100644 index 0000000..ee1c4bb Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.NATADJ_sig_thfmain_outlierRM.pdf differ diff --git a/mid_res/rev/tfh/figures_7/tfh.d7fc.pdf b/mid_res/rev/tfh/figures_7/tfh.d7fc.pdf new file mode 100644 index 0000000..9c1e31c Binary files /dev/null and b/mid_res/rev/tfh/figures_7/tfh.d7fc.pdf differ diff --git a/mid_res/rev/tfh/generated_data/tfh.baseline.pdf b/mid_res/rev/tfh/generated_data/tfh.baseline.pdf new file mode 100644 index 0000000..41c4cc9 Binary files /dev/null and b/mid_res/rev/tfh/generated_data/tfh.baseline.pdf differ diff --git a/mid_res/rev/tfh/parse.R b/mid_res/rev/tfh/parse.R new file mode 100644 index 0000000..6a3436d --- /dev/null +++ b/mid_res/rev/tfh/parse.R @@ -0,0 +1,194 @@ +# scripts provided by Mani + +# this fn is somewhat obsolete, use v1v2 fn below instead. +parse.M1 <- function(day) +{ + a = read.delim("M1.exported.txt", stringsAsFactors=F) + stopifnot(grepl(".*M1 H1N1-", a$Sample)) + a = a[!grepl("CHI-007",a$Sample),] + + a$subject = do.call(rbind, strsplit(a$Sample, "-"))[,2] + a$day = gsub("day ", "day", do.call(rbind, strsplit(a$Sample, "-"))[,3]) + print(table(a$day)) + a = a[a$day==day,] + print(table(a$day)) + + print(colnames(a)) + a$ID315 = a$Monocytes.Single.cells.Alive.CD14..CD16..class..Freq..of.Parent / a$Monocytes.Single.cells.Alive.Count + a$ID300 = a$Monocytes.Single.cells.Alive.CD14..CD16..interm.Freq..of.Parent / a$Monocytes.Single.cells.Alive.Count + a$ID334 = a$Monocytes.Single.cells.Alive.CD14.CD16...non.class..Freq..of.Parent / a$Monocytes.Single.cells.Alive.Count + + b = as.matrix(a[,c('ID315','ID300','ID334')]) + rownames(b) = paste0("X",a$subject) + stopifnot(b >= 0, b <= 1) + b = t(b) + #write.table(file='tmp.M1.day0.txt', b, quote=F, sep="\t") + b +} + +# ID299 is the CD45+ subset of "Monocytes/Single.cells/Alive" cell population. The three monocyte subsets are expressed as fraction of the former ID299 population in this fn, instead of the latter parent population as in the above fn. Using ID299 is more consistent with the description in the virtual flow manuscript of how all CHI cell populations are expressed as fraction of "higher-level CD45+ leukocyte or lymphocyte" gated populations. +parse.M1.v1v2 <- function(day, debugcheck=(day=='day0'), controls=F, counts=F) +{ + v1 = read.delim("M1.exported.txt", stringsAsFactors=F) #contains ID3xxx subsets (Nr. of the fraction) + v2 = read.delim("M1.v2.exported.txt", stringsAsFactors=F) #contains ID299 (Dr. of the fraction) + stopifnot(v1$Sample == v2$Sample) + a = v1 + a$ID299 = v2$ID299 + stopifnot(grepl(".*M1 H1N1-", a$Sample)) + if (!controls) { a = a[!grepl("CHI-007",a$Sample),]; } else { a$Sample = gsub("CHI-007","CHI007",a$Sample); } + + if (debugcheck) #check if common cols. are same, and how different ID299 vs. its parents are. + { + v1 = v1[,intersect(colnames(v1),colnames(v2))] + v2 = v2[,intersect(colnames(v1),colnames(v2))] + print(all.equal(v1,v2)) + source("~/codes/Rfns/helptools.R") + ridx = arrinds(v1 != v2)[,1] + stopifnot(v1[-ridx,]==v2[-ridx,], v1[ridx,1:2]==v2[ridx,1:2]) + print(cbind(v1[ridx,],v2[ridx,-(1:2)])) #excluding two bridge sample measurements, only sample 215.day0 has some minor discrepancies between v1 and v2 + print(summary(a$ID299 / a$Monocytes.Single.cells.Alive.Count)) #bummer: these two cell counts are not that different, so results should not change much! + } + + a$subject = do.call(rbind, strsplit(a$Sample, "-"))[,2] + a$day = gsub("day ", "day", do.call(rbind, strsplit(a$Sample, "-"))[,3]) + print(table(a$day)) + if (!controls) { a = a[a$day==day,]; } else { a = a[a$subject=="CHI007",]; } + print(table(a$day)) + print(table(a$subject)) + + print(colnames(a)) + if (counts) { a$ID299=1; } + a$ID315 = a$Monocytes.Single.cells.Alive.CD14..CD16..class..Freq..of.Parent / a$ID299 + a$ID300 = a$Monocytes.Single.cells.Alive.CD14..CD16..interm.Freq..of.Parent / a$ID299 + a$ID334 = a$Monocytes.Single.cells.Alive.CD14.CD16...non.class..Freq..of.Parent / a$ID299 + + b = as.matrix(a[,c('ID315','ID300','ID334')]) + rownames(b) = paste0("X",a$subject,ifelse(grepl("^day",a$day),"",paste0(".",a$day))) + stopifnot(b >= 0, all(b <= 1) || counts) + b = t(b) + #write.table(file='tmp.M1.day0.txt', b, quote=F, sep="\t") + b +} + +parse.T4 <- function(day, controls=F, counts=F) +{ + hdr = read.delim("T4.exported.txt", stringsAsFactors=F, header=F, nrows=1) + a = read.delim("T4.exported.txt", stringsAsFactors=F) + stopifnot(grepl(".*T4 H1N1-", a$Sample), length(hdr)==ncol(a)) + if (!controls) { a = a[!grepl("CHI-007",a$Sample),]; } else { a$Sample = gsub("CHI-007","CHI007",a$Sample); } + + # get the Tfh1 and Tfh1/Tfh17 cell popns, along with their CD278+PD1+ subsets. + cidx = c(IDhlg = which(hdr == "Lymphocyte/single cells/Alive cells,Count"), #higher-level gate in the hierarchy + ID242 = grep("Tfh1,Count$",hdr), + ID244 = grep("Tfh1\\/.*CD278\\+.*PD\\-1\\+,Count$",hdr), + ID247 = grep("Tfh1 Tfh17,Count$",hdr), + ID249 = grep("Tfh1 Tfh17\\/.*CD278\\+.*PD\\-1\\+,Count$",hdr)) + stopifnot(length(cidx)==5) + print(cbind(names(cidx), t(hdr[cidx]))) + stopifnot(make.names(hdr)[cidx] == colnames(a)[cidx]) + + a$subject = do.call(rbind, strsplit(a$Sample, "-"))[,2] + a$day = gsub("day ", "day", do.call(rbind, strsplit(a$Sample, "-"))[,3]) + print(table(a$day)) + if (!controls) { a = a[a$day==day,]; } else { a = a[a$subject=="CHI007",]; } + print(table(a$day)) + print(table(a$subject)) + + if (counts) { a[,cidx['IDhlg']]=1; } + stopifnot(names(cidx)[1]=="IDhlg") + b = matrix(NA, nrow(a), length(cidx)-1, dimnames=list(paste0("X",a$subject,ifelse(grepl("^day",a$day),"",paste0(".",a$day))), names(cidx)[-1])) + for (i in names(cidx)[-1]) + { + b[,i] = a[,cidx[i]]/a[,cidx['IDhlg']] + } + stopifnot(b >= 0, all(b <= 1) || counts) + b = t(b) + #write.table(file='tmp.T4.day0.txt', b, quote=F, sep="\t") + b +} + +# sapply(c('day0','day1','day7'), function(x) { parse.M1T4(x); }) +parse.M1T4 <- function(day="day0") +{ + M1 = parse.M1.v1v2(day) + T4 = parse.T4(day) + print(dim(M1)) + print(dim(T4)) + stopifnot(colnames(M1)==colnames(T4), nrow(M1)==3, nrow(T4)==4, !(rownames(M1) %in% rownames(T4)), !duplicated(colnames(M1))) + write.table(file=paste0('tmp.M1T4.',gsub(" ","",day),'.txt'), rbind(M1,T4), quote=F, sep="\t") +} + +parse.M1T4.controls <- function() +{ + dummy.day = 'day0' + M1 = parse.M1.v1v2(dummy.day, controls=T) + T4 = parse.T4(dummy.day, controls=T) + print(dim(M1)) + print(dim(T4)) + stopifnot(colnames(M1)==colnames(T4), nrow(M1)==3, nrow(T4)==4, !(rownames(M1) %in% rownames(T4)), !duplicated(colnames(M1))) + rbind(M1,T4) +} + +parse.M1T4.counts <- function(day='day0') +{ + M1 = parse.M1.v1v2(day, counts=T) + T4 = parse.T4(day, counts=T) + print(dim(M1)) + print(dim(T4)) + stopifnot(colnames(M1)==colnames(T4), nrow(M1)==3, nrow(T4)==4, !(rownames(M1) %in% rownames(T4)), !duplicated(colnames(M1))) + rbind(M1,T4) +} + +# day7-day0 data +day7minusday0 <- function() +{ + day0 = read.delim('M1T4.day0.txt', stringsAsFactors=F) + day7 = read.delim('M1T4.day7.txt', stringsAsFactors=F) + stopifnot(identical(dimnames(day0), dimnames(day7))) + write.table(file='M1T4.day7minusday0.txt', day7-day0, sep="\t", quote=F) +} + +# alternative gating based on different markers (CD64 HLA-DR instead of CD14 CD16) for defining monocyte subsets such as non-classical monocytes. +alternative.gating.checks <- function() +{ + a = read.delim("newpops/M1.v2.exported.txt", stringsAsFactors=F) + a = a[grepl("day 0",a$Sample),] + a$subject = do.call(rbind, strsplit(a$Sample, "-"))[,2] + a$subject = make.names(a$subject) + stopifnot(!duplicated(a$subject)) + a$ID349 = a$ID349/a$ID299 + a$ID353 = a$ID353/a$ID299 + a$ID357 = a$ID357/a$ID299 + b = a[,c('ID349','ID353','ID357')] + rownames(b) = a$subject + write.table(t(b), 'newpops/M1altgate.day0.txt', quote=F, sep="\t") + colnames(b) = paste('obs',colnames(b),'day0',sep='.') + + source("~/codes/Rfns/helptools.R") + a = read.delim('newpops/obspreddata.txt', row.names=1) + a = a[,grepl("(ID315|ID300|ID334).day0",colnames(a))] + d = rbind.fill.rownames(as.data.frame(t(a)), as.data.frame(t(b))) + d = as.data.frame(t(d)) + + if (is.null(d$pred.ID315.day0)) + { + stopifnot(is.null(d$pred.ID300.day0), !is.null(d$pred.ID334.day0)) + d$pred.ID315.day0 = d$pred.ID300.day0 = NA + } + + library(ggplot2) + library(gridExtra) + pdf(file='newpops/M1altgate.checks.pdf') + print(ggplot(d, aes(obs.ID315.day0, obs.ID353.day0)) + geom_point()) + print(ggplot(d, aes(obs.ID300.day0, obs.ID349.day0)) + geom_point()) + pvstr = sprintf('(P=%.2g)', cor.test(d$obs.ID334.day0, d$obs.ID357.day0, method='spearman')$p.value); pvstr="" + print(gp1 <- ggplot(d, aes(obs.ID334.day0, obs.ID357.day0)) + geom_point() + labs(title=pvstr)) + print(ggplot(d, aes(pred.ID315.day0, obs.ID353.day0)) + geom_point()) + print(ggplot(d, aes(pred.ID300.day0, obs.ID349.day0)) + geom_point()) + pvstr = sprintf('(P=%.2g)', cor.test(d$pred.ID334.day0, d$obs.ID357.day0, method='spearman')$p.value); pvstr="" + print(gp2 <- ggplot(d, aes(pred.ID334.day0, obs.ID357.day0)) + geom_point() + labs(title=pvstr)) + grid.arrange(gp1, gp2, nrow=2, ncol=2) + dev.off() + +} +