From 18b7120d843636590831c0ba93a7c2a49baf7308 Mon Sep 17 00:00:00 2001 From: wlu04 Date: Wed, 4 May 2022 14:16:50 -0400 Subject: [PATCH] create PDFs --- R/constants.R | 57 ++++++++++++-------- R/figure1.R | 24 ++++++--- R/figure2.R | 143 +++++++++++++++++++++++++++++++++++++++++--------- R/figure3.R | 45 ++++++++++++---- R/figure4.R | 14 +++-- R/figureS11.R | 10 +++- R/figureS12.R | 12 ++++- R/figureS13.R | 13 ++++- R/figureS14.R | 11 +++- R/figureS15.R | 10 +++- R/figureS16.R | 20 ++++--- R/figureS17.R | 13 +++-- R/figureS18.R | 10 +++- R/figureS19.R | 23 ++++++-- R/figureS20.R | 14 +++-- R/figureS8.R | 10 +++- R/figureS9.R | 23 ++++++-- 17 files changed, 346 insertions(+), 106 deletions(-) diff --git a/R/constants.R b/R/constants.R index 03d748e..8b02059 100644 --- a/R/constants.R +++ b/R/constants.R @@ -18,9 +18,10 @@ source('~/ukbb_pan_ancestry/constants.R') tranche = '500k' test = 'skato' -path_to = '~/Desktop/ukb_exomes_450k/' -output = '~/Desktop/ukb_exomes_450k/' -output_path = '~/Desktop/ukb_exomes_450k/' +path_to = '~/Dropbox (Partners HealthCare)/analysis/ukb_exomes_450k/' +output = '~/Dropbox (Partners HealthCare)/analysis/ukb_exomes_450k/' +output_path = '~/Dropbox (Partners HealthCare)/analysis/ukb_exomes_450k/' +in2mm = 25.4 @@ -44,7 +45,8 @@ caf_names = c(paste0('CAF:[0 , 0.0001]'), 'CAF:(0.0001, 0.001]', 'CAF:(0.001, 0. ac_types = c('(0, 5]', '(5, 50]', '(50, 500]', '(500, 5000]', '(5000, 50000]', '(50000, )') ac_names = c('(0, 5]', '(5, 50]', '(50, 500]', '(500, 5000]', '(5000, 50000]', paste0('(50000, ', bquote("\U221E"), ' )')) gene_list_names = c('Constrained' = 'Constrained', 'Developmental Delay' = 'Developmental\nDelay', 'all_ad' = 'Autosomal dominant', 'all_ar' = 'Autosomal recessive', - 'fda_approved_drug_targets' = 'FDA approved\ndrug targets', 'gwascatalog' = 'GWAS catalog', 'CEGv2_subset_universe' = 'Cell essential', 'NEGv1_subset_universe' = 'Cell non-essential') + 'fda_approved_drug_targets' = 'FDA approved\ndrug targets', 'gwascatalog' = 'GWAS catalog') + #, 'CEGv2_subset_universe' = 'Cell essential', 'NEGv1_subset_universe' = 'Cell non-essential') icd_names = c('A' = 'Infectious', 'B' = 'Infectious','C' = 'Neoplasms', 'D' = 'Blood/immune', 'E' = 'Endocrine/metabolic', 'F' = 'Mental/behavioral', 'G' = 'Nervous', 'H1' = 'Eye', 'H2' = 'Ear', 'I' = 'Circulatory', 'J' = 'Respiratory', 'K' = 'Digestive', 'L' = 'Skin/subcutaneous', 'M' = 'Musculoskeletal', 'N' = 'Genitourinary', 'O' = 'Pregnancy', 'P' = 'Perinatal', 'Q' = 'Congenital', 'R' = 'Symptoms', 'S' = 'Injury/poison', 'T' = 'Injury/poison', 'V' = 'External causes', 'Y' = 'External causes', 'Z' = 'Health Factors') @@ -246,7 +248,7 @@ save_group_matched_figure2 = function(matched_summary, levels, labels, group_col geom_pointrange(stat = "identity", position = position_dodge(width = 2)) + labs(y = 'Proportion', x = x_lab) + scale_color_manual(name = x_lab, values = c("#D95F02", "#7570B3", "#1B9E77")) + - scale_y_continuous(label = label_percent(accuracy = 1)) + + scale_y_continuous(label = label_percent(accuracy = 1), limits = c(0, 1.05)) + scale_x_discrete(labels = levels) + theme_classic() + themes + facet_grid(~interval) + @@ -561,20 +563,21 @@ save_icd_manhattan_figure = function(data, p_filter = 1e-2, width = 10, spacing icd_mh_plt = data %>% filter(min_p < p_filter) %>% ggplot + aes(x = pos, y = -log10(min_p), color = icd10) + - geom_point(size = 0.75) + + geom_point_rast(size = 0.75) + geom_hline(yintercept = -log10(sig_level), lty = 2)+ scale_x_continuous(breaks = ctr, labels = icd_names[icd_labels]) + scale_color_manual(values = icd_colors) + - scale_y_continuous(trans = gwas_loglog_trans(), breaks = loglog_breaks, name = expression(bold(paste('-log'[10], '(', italic(p), ')'))))+ + # scale_y_continuous(trans = gwas_loglog_trans(), breaks = loglog_breaks, name = expression(bold(paste('-log'[10], '(', italic(p), ')'))))+ + scale_y_continuous(trans = gwas_loglog_trans(), breaks = c(seq(2, 10, 2), 20, 50, 100, 200, 400), name = expression(bold(paste('-log'[10], '(', italic(p), ')'))))+ labs(x = NULL, y = expression(bold(paste('-log'[10], '(', italic(p), ')')))) + theme_classic() + themes + theme(legend.position = 'none', - axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95, size = 12), - axis.text.y = element_text(size = 12), + axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95, size = 7), + axis.text.y = element_text(size = 7), plot.margin = margin(0.8, 0.1, 0.1, 0.1, "cm"), - axis.title = element_text(size = 14), - legend.title = element_text(size = 12, face = 'bold'), - legend.text = element_text(size = 12), ) + axis.title = element_text(size = 8), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 8), ) if(save_plot){ png(output_path, height = 4, width = 6, units = 'in', res = 300) print(icd_mh_plt) @@ -619,9 +622,14 @@ save_prop_by_annt_freq_figure_500k = function(matched_summary, output_path, save theme(panel.spacing = unit(0, "lines"), strip.background = element_blank(), strip.placement = "outside", - strip.text = element_text(face = 'bold'), - axis.text= element_text(size = 10), - axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95) ) + strip.text = element_text(size = 8, face = 'bold'), + axis.title = element_text(size = 9), + axis.text = element_text(size = 7), + axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95), + legend.key.size = unit(0.5, 'cm'), + # axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 7)) if(save_plot){ png(output_path, height = 3, width = 5, units = 'in', res = 300) print(plt) @@ -639,11 +647,12 @@ save_count_barplot_figure = function(cnt_data, cnt_type, save_plot = F, output_p scale_alpha_discrete(range = c(0.5, 1)) + labs(y = paste('Number of', cnt_type), x = NULL, alpha = NULL) + annotation_color_scale + annotation_fill_scale + themes + - geom_text(aes(label = cnt), vjust = -0.3, size = 3, position = position_dodge(width = 1), color = '#187bcd') + + geom_text(aes(label = cnt), vjust = -0.1, size = 2, position = position_dodge(width = 1), color = '#187bcd') + theme_classic() + themes + theme(legend.position = 'none')+ - theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), - axis.title = element_text(size = 10), - axis.text = element_text(size = 10), + theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), + axis.title = element_text(size = 8), + axis.text = element_text(size = 7), + #axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95), legend.title = element_text(size = 8, face = 'bold'), legend.text = element_text(size = 8), ) if(save_plot){ @@ -666,13 +675,15 @@ save_count_by_freq_figure = function(cnt_data, type, save_plot = F, output_path) scale_y_continuous(label = comma) + labs(y = paste('Number of', type), x = paste(freq, 'Interval'))+ annotation_color_scale + annotation_fill_scale + themes + - geom_text(aes(label = cnt, color = annotation), vjust = -0.3, size = 2, position = position_dodge(width = 1)) + + geom_text(aes(label = cnt, color = annotation), vjust = -0.1, size = 2, position = position_dodge(width = 1)) + theme_classic() + themes + theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), - axis.title = element_text(size = 10), - axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95), + axis.title = element_text(size = 8), + axis.text = element_text(size = 7), + legend.key.size = unit(0.5, 'cm'), + # axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95), legend.title = element_text(size = 8, face = 'bold'), - legend.text = element_text(size = 8)) + legend.text = element_text(size = 7)) if(save_plot){ png(output_path, height = 4, width = 6, units = 'in', res = 300) print(plt) diff --git a/R/figure1.R b/R/figure1.R index 2011ed9..a70adcb 100644 --- a/R/figure1.R +++ b/R/figure1.R @@ -1,11 +1,13 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) +#install.packages("Cairo") +library(Cairo) gene = load_ukb_file(paste0('gene_qc_metrics_ukb_exomes_', tranche,'.txt.bgz'), subfolder = 'qc/') var = load_ukb_file(paste0('variant_qc_metrics_ukb_exomes_', tranche,'.txt.bgz'), subfolder = 'qc/') pheno = load_ukb_file(paste0('pheno_qc_metrics_ukb_exomes_', tranche,'.txt.bgz'), subfolder = 'qc/') -figure1 = function(save_plot = F, output_path){ +figure1 = function(save_plot = F, save_pdf = F,output_path){ figure1_cnt = data.frame( type = rep(c('Variants', 'Groups', 'Phenotypes'), 2), cnt = as.integer(c(nrow(var), nrow(gene), nrow(pheno), @@ -15,9 +17,9 @@ figure1 = function(save_plot = F, output_path){ filter = rep(c('Before Filtering', 'After Filtering'), each = 3) ) %>% mutate(filter = factor(filter, levels = c('Before Filtering', 'After Filtering'))) - figure1a = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Phenotypes', save_plot = T, output_path = paste0(output, 'figure1a_pheno_cnt.png')) - figure1b = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Variants', save_plot = T, output_path = paste0(output, 'figure1b_var_cnt.png')) - figure1c = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Groups', save_plot = T, output_path = paste0(output, 'figure1c_gene_cnt.png')) + figure1a = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Phenotypes', save_plot = save_plot, output_path = paste0(output, 'figure1a_pheno_cnt.png')) + figure1b = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Variants', save_plot = save_plot, output_path = paste0(output, 'figure1b_var_cnt.png')) + figure1c = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Groups', save_plot = save_plot, output_path = paste0(output, 'figure1c_gene_cnt.png')) gene_cnt_by_freq = gene %>% filter(keep_gene_expected_ac & keep_gene_coverage & keep_gene_n_var & get(paste0('keep_gene_',test))) %>% @@ -27,8 +29,8 @@ figure1 = function(save_plot = F, output_path){ mutate(annotation = if_else(annotation %in% c('missense', 'LC'), 'missense|LC', annotation)) %>% format_count_by_freq_data(freq_col = 'AF') - figure1d = save_count_by_freq_figure(cnt_data = var_cnt_by_freq, type = 'Variants', save_plot = T, output_path = paste0(output, 'figure1d_var_cnt_by_AF.png')) - figure1e = save_count_by_freq_figure(cnt_data = gene_cnt_by_freq, type = 'Groups', save_plot = T, output_path = paste0(output, 'figure1e_gene_cnt_by_CAF.png')) + figure1d = save_count_by_freq_figure(cnt_data = var_cnt_by_freq, type = 'Variants', save_plot = save_plot, output_path = paste0(output, 'figure1d_var_cnt_by_AF.png')) + figure1e = save_count_by_freq_figure(cnt_data = gene_cnt_by_freq, type = 'Groups', save_plot = save_plot, output_path = paste0(output, 'figure1e_gene_cnt_by_CAF.png')) figure1_top = egg::ggarrange(figure1a, figure1b, figure1c, labels = c('(A) Phenotypes', '(B) Variants', '(C) Groups'), ncol = 3, label.args = list(gp = gpar(font = 2, cex = 0.75), vjust = 1.5, hjust = 0)) @@ -40,7 +42,15 @@ figure1 = function(save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + ggsave(filename=output_path, plot=figure, device=cairo_pdf, + width = 174/in2mm, height = 130/in2mm, units = "in", dpi = 300) + # cairo_pdf(output_path, height = 130/in2mm, width = 174/in2mm, family="NimbusSan") + # print(figure) + # dev.off() + } return(figure) } -figure1(save_plot = T, output_path = paste0(output, 'figure1_', test, '.png')) \ No newline at end of file +figure1(save_pdf = T, output_path = paste0(output, 'final_pdf_figures/figure1_', test, '.pdf')) +# figure1(save_plot = T, output_path = paste0(output, 'figure1_', test, '.png')) \ No newline at end of file diff --git a/R/figure2.R b/R/figure2.R index cad6cf3..f079103 100644 --- a/R/figure2.R +++ b/R/figure2.R @@ -3,6 +3,7 @@ detach(package:plyr) icd_var_after = load_ukb_file(paste0('icd_min_p_var_filtered_',test,'_', tranche,'.txt.bgz'), subfolder = 'analysis/') icd_gene_after = load_ukb_file(paste0('icd_min_p_gene_filtered_', test, '_', tranche, '.txt.bgz'), subfolder = 'analysis/') +save_plot = F # figure 2 (A) figure2a = function(save_plot = F, output_path){ @@ -37,7 +38,7 @@ figure2b = function(save_plot = F, output_path){ } # figure 2 (C/D) -figure2cd = function(type = 'pheno', save_plot = F, output_path){ +figure2cd1 = function(type = 'pheno', save_plot = F, output_path){ var_gene = load_ukb_file(paste0('var_gene_comparison_by_', type,'_filtered_', test, '_3annt_', tranche,'.txt.bgz'), subfolder = 'analysis/') if(type == 'pheno'){ set = c('categorical', 'continuous', 'icd10') @@ -64,10 +65,10 @@ figure2cd = function(type = 'pheno', save_plot = F, output_path){ color_scale = annotation_color_scale axis_scale = scale_x_discrete(labels = annotation_names) } - figure2_p1 = var_gene_summary %>% + figure = var_gene_summary %>% ggplot + aes(x = category, y = value) + geom_bar(stat="identity", aes(color = category), fill='grey80', width = 0.5) + - labs(y = 'Significant Association', x = '') + + labs(y = 'Significant \n Association', x = '') + geom_bar(stat = "identity", aes(alpha = label), width = 0.5) + scale_alpha_manual(values = c(0.6, 1, 0.1), name = '', labels = c( 'Significant by variant', 'Significant by both', 'Significant by gene'), guide = guide_legend(override.aes = list(fill = 'grey30') )) + color_scale + axis_scale + @@ -77,33 +78,76 @@ figure2cd = function(type = 'pheno', save_plot = F, output_path){ strip.placement = "top", strip.text = element_text(face = 'bold'), plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), - axis.text.x = element_text(size = 13, face = 'bold'), - axis.text.y = element_text(size = 12), - axis.title.y = element_text(size = 13, face = 'bold'), - legend.title = element_text(size = 11, face = 'bold'), - legend.text = element_text(size = 11), ) + axis.text.x = element_text(size = 6, face = 'bold'), + axis.text.y = element_text(size = 7), + axis.title.y = element_text(size = 7, face = 'bold'), + legend.margin = margin(0.1, 0.1, 0.1, 0.1, "cm"), + legend.box.margin = margin(0.15, 0.1, 0.15, 0.1, "cm"), + legend.spacing = unit(0.1, "mm"), + legend.box = 'vertical', + legend.key.size = unit(0.3, 'cm'), + legend.title = element_text(size = 7, face = 'bold'), + legend.text = element_text(size = 6.5), ) - figure2_p2 = var_gene_summary %>% + if(save_plot){ + png(output_path, height = 4, width = 6, units = 'in', res = 300) + print(figure) + dev.off() + } + return(figure) +} + + +figure2cd2 = function(type = 'pheno', save_plot = F, output_path){ + var_gene = load_ukb_file(paste0('var_gene_comparison_by_', type,'_filtered_', test, '_3annt_', tranche,'.txt.bgz'), subfolder = 'analysis/') + if(type == 'pheno'){ + set = c('categorical', 'continuous', 'icd10') + group_type = trait_types + }else{ + set = c('missense|LC', 'pLoF', 'synonymous') + group_type = annotation_types + } + var_gene_summary = rbind(get_var_gene_overlap_count(data = var_gene, type = type, group = 'all', normalize = T, print = F), + get_var_gene_overlap_count(data = var_gene, type = type, group = set[1], normalize = T, print = F), + get_var_gene_overlap_count(data = var_gene, type = type, group = set[2], normalize = T, print = F), + get_var_gene_overlap_count(data = var_gene, type = type, group = set[3], normalize = T, print = F)) %>% + filter(label != 'None' & category != 'all') %>% + mutate(label = factor(label, levels = c('Significant by variant', 'Significant by both', 'Significant by gene')), + category = factor(category, levels = group_type)) %>% + group_by(category) %>% + arrange(desc(label)) %>% + mutate(value2 = value/sum(value)) %>% + mutate(pos = cumsum(value2) - 0.5*value2) + if(type == 'pheno'){ + color_scale = trait_color_scale + axis_scale = scale_x_discrete(labels = trait_type_names) + }else{ + color_scale = annotation_color_scale + axis_scale = scale_x_discrete(labels = annotation_names) + } + + figure = var_gene_summary %>% ggplot + aes(x = category, y = value2, label = round(value, 2)) + geom_bar(stat="identity", aes(color = category), fill='grey80', width = 0.5) + - labs(y = 'Significant Association', x = '') + + labs(y = 'Significant \n Association', x = '') + geom_bar(stat = "identity", aes(alpha = label), width = 0.5) + scale_alpha_manual(values = c(0.6, 1, 0.1), name = NULL, labels = c( 'Significant by variant', 'Significant by both', 'Significant by gene'), guide = guide_legend(override.aes = list(fill = 'grey30') )) + color_scale + axis_scale + theme_classic() + themes + - geom_text(aes(x = category, y = round(pos, 2)), size = 3.8, color = 'black') + + geom_text(aes(x = category, y = round(pos, 2)), size = 2, color = 'black') + theme(panel.spacing = unit(1, "lines"), strip.background = element_blank(), strip.placement = "top", strip.text = element_text(face = 'bold'), plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), - axis.text.x = element_text(size = 13, face = 'bold'), - axis.text.y = element_text(size = 12), - axis.title.y = element_text(size = 13, face = 'bold'), - legend.title = element_text(size = 11, face = 'bold'), - legend.text = element_text(size = 11), ) - figure = ggpubr::ggarrange(figure2_p1, figure2_p2, ncol = 2, labels = c(paste0('Associations per ', if_else(type=='pheno', 'phenotype', 'group')), 'Normalized'), - common.legend = TRUE, vjust = 1, hjust = 0, font.label = list(size = 12, color = "black", face = "bold", family = NULL)) + axis.text.x = element_text(size = 6, face = 'bold'), + axis.text.y = element_text(size = 7), + axis.title.x = element_text(size = 6, face = 'bold'), + axis.title.y = element_text(size = 7, face = 'bold'), + # legend.key.size = unit(0.2, 'cm'), + # legend.title = element_text(size = 6, face = 'bold'), + # legend.text = element_text(size = 5), + ) if(save_plot){ png(output_path, height = 4, width = 6, units = 'in', res = 300) print(figure) @@ -112,27 +156,74 @@ figure2cd = function(type = 'pheno', save_plot = F, output_path){ return(figure) } +# figure 2 (C) +figure2c = function(save_plot = F, pdf = T, output_path){ + figure2c1 = figure2cd1(type = 'pheno', save_plot = save_plot, output_path = paste0(output, 'figure2c1_var_gene_comparison_pheno.png')) + figure2c2 = figure2cd2(type = 'pheno', save_plot = save_plot, output_path = paste0(output, 'figure2c2_var_gene_comparison_pheno.png')) + figure = ggpubr::ggarrange(figure2c1, figure2c2, ncol = 2, + labels = c('Associations per phenotype', 'Normalized'), + common.legend = TRUE, + vjust = 1, hjust = 0, font.label = list(size = 8, color = "black", face = "bold", family = NULL) + ) + if(save_plot){ + if(pdf){ + pdf(output_path, height = 57/in2mm, width = 114/in2mm) + }else{ + png(output_path, height = 57/in2mm, width = 114/in2mm, units = 'in', res = 300) + } + print(figure) + dev.off() + } + return(figure) +} +# figure 2 (D) +figure2d = function(save_plot = F, pdf = T, output_path){ + figure2d1 = figure2cd1(type = 'gene', save_plot = save_plot, output_path = paste0(output, 'figure2d1_var_gene_comparison_annotation.png')) + figure2d2 = figure2cd2(type = 'gene', save_plot = save_plot, output_path = paste0(output, 'figure2d2_var_gene_comparison_annotation.png')) + figure = ggpubr::ggarrange(figure2d1, figure2d2, ncol = 2, + labels = c('Associations per group', 'Normalized'), + common.legend = TRUE, + vjust = 1, hjust = 0, font.label = list(size = 8, color = "black", face = "bold", family = NULL) + ) + if(save_plot){ + if(pdf){ + pdf(output_path, height = 57/in2mm, width = 114/in2mm) + }else{ + png(output_path, height = 57/in2mm, width = 114/in2mm, units = 'in', res = 300) + } + print(figure) + dev.off() + } + return(figure) +} # figure 2 -figure2a_icd_manhattan_var = figure2a(save_plot = T, output_path = paste0(output, 'figure2a_icd_manhattan_filtered_var.png')) -figure2b_icd_manhattan_gene = figure2b(save_plot = T, output_path = paste0(output, paste0('figure2b_icd_manhattan_filtered_', test, '.png'))) -figure2c_var_gene_comparison_pheno = figure2cd(type = 'pheno', save_plot = T, output_path = paste0(output, 'figure2c_var_gene_comparison_pheno.png')) -figure2d_var_gene_comparison_annotation = figure2cd(type = 'gene', save_plot = T, output_path = paste0(output, 'figure2d_var_gene_comparison_annotation.png')) +figure2a_icd_manhattan_var = figure2a(save_plot = save_plot, output_path = paste0(output, 'figure2a_icd_manhattan_filtered_var.png')) +figure2b_icd_manhattan_gene = figure2b(save_plot = save_plot, output_path = paste0(output, paste0('figure2b_icd_manhattan_filtered_', test, '.png'))) +figure2c_var_gene_comparison_pheno = figure2c(save_plot = T, pdf = T, output_path = paste0(output, 'figure2c_var_gene_comparison_pheno.pdf')) +figure2d_var_gene_comparison_annotation = figure2d(save_plot = T, pdf = T, output_path = paste0(output, 'figure2d_var_gene_comparison_annotation.pdf')) + -figure2 = function(save_plot = F, output_path){ +figure2 = function(save_plot = F, save_pdf = F, output_path){ figure = ggarrange(figure2a_icd_manhattan_var, figure2b_icd_manhattan_gene, figure2c_var_gene_comparison_pheno, figure2d_var_gene_comparison_annotation, - ncol = 1, nrow = 4, labels = c('(A) Single-Variant', '(B) SKAT-O', '(C) Phenotypes', '(D) Groups'), label.args = list(gp = gpar(font = 2, cex = 1.5), vjust = 1)) + ncol = 1, nrow = 4, labels = c('(A) Single-Variant', '(B) SKAT-O', '(C) Phenotypes', '(D) Groups'), label.args = list(gp = gpar(font = 2, cex = 0.75), vjust = 1), + heights = c(0.18, 0.18, 0.32, 0.32)) if(save_plot){ png(output_path, height = 20, width = 10, units = 'in', res=300) print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 228/in2mm, width = 114/in2mm) + print(figure) + dev.off() + } return(figure) } -figure2(save_plot = T, output_path = paste0(output, 'figure2_', test, '.png')) - +figure2(save_plot = save_plot, save_pdf = T,output_path = paste0(output, 'final_pdf_figures/figure2_', test, '.pdf')) +# figure2(save_plot = save_plot, output_path = paste0(output, 'figure2_', test, '.png')) diff --git a/R/figure3.R b/R/figure3.R index c42c14b..d182990 100644 --- a/R/figure3.R +++ b/R/figure3.R @@ -1,6 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) +test = 'burden' pheno_sig_after = load_ukb_file(paste0('pheno_sig_cnt_filtered_', test, '_gene_', tranche,'.txt.bgz'), subfolder = 'analysis/') %>% select(phenocode, description, all_sig_gene_cnt) pheno_sig_after_var = load_ukb_file(paste0('pheno_sig_cnt_filtered_', test, '_var_', tranche,'.txt.bgz'), subfolder = 'analysis/') %>% @@ -31,7 +32,19 @@ figure3c = function(save_plot=F, output_path){ # scale_x_discrete(labels = c('Acceptor', 'Donor')) + scale_color_manual(name = 'Mean proportion expressed', values = c("#084594", "#9ECAE1", "#4292C6")) + scale_fill_manual(name = 'Mean proportion expressed', values = c("#084594", "#9ECAE1", "#4292C6")) + - theme_classic() + themes + theme(panel.spacing = unit(1, "lines")) + theme_classic() + themes + + theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"),, + panel.spacing = unit(1, "lines"), + strip.background = element_blank(), + strip.placement = "outside", + strip.text = element_text(size = 8, face = 'bold'), + axis.title = element_text(size = 9), + axis.text = element_text(size = 7), + axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95), + legend.key.size = unit(0.5, 'cm'), + # axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 7)) if(save_plot){ png(output_path, height = 4, width = 7.5, units = 'in', res = 300) print(figure) @@ -40,7 +53,7 @@ figure3c = function(save_plot=F, output_path){ return(figure) } -figure3 = function(save_plot = F, output_path){ +figure3 = function(save_plot = F, save_pdf = F, output_path){ if(tranche=='500k'){ var_sig_sum = format_sig_cnt_summary_data_500k(var_sig_after, freq_col = 'AF', sig_col = 'all_sig_pheno_cnt') figure3a = save_prop_by_annt_freq_figure_500k(var_sig_sum, output_path = paste0(output, 'figure3a_',tranche, '_', test,'_single.png'), save_plot = save_plot) @@ -69,20 +82,32 @@ figure3 = function(save_plot = F, output_path){ labels = c( 'Pathogenic/Likely Pathogenic', 'Uncertain significance', 'Benign/Likely Benign'), group_col = 'pathogenicity', x_lab = 'ClinVar Pathogenicity', output_path = paste0(output, 'figure3d_',tranche, '_', test,'.png'), save_plot = F) + facet_grid(~interval, switch = "x", scales = "free_x", space = "free_x", labeller = label_type) + - theme(panel.spacing = unit(0, "lines"), + theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), + panel.spacing = unit(0, "lines"), strip.background = element_blank(), strip.placement = "outside", - strip.text = element_text(face = 'bold'), - axis.text= element_text(size = 10), - axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95) ) - figure3 = ggarrange(figure3a, figure3b, figure3c, figure3d, labels = c('(A) Single-Variant', '(B) Burden', '(C) Splice Variants', '(D) ClinVar Pathogenicity'), nrow=4, label.args = list(gp = gpar(font = 2, cex = 0.75), vjust = 2)) + strip.text = element_text(size = 8, face = 'bold'), + axis.title = element_text(size = 9), + axis.text = element_text(size = 7), + axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95), + legend.key.size = unit(0.5, 'cm'), + # axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 7)) + figure = ggarrange(figure3a, figure3b, figure3c, figure3d, labels = c('(A) Single-Variant', '(B) Burden', '(C) Splice Variants', '(D) ClinVar Pathogenicity'), nrow=4, label.args = list(gp = gpar(font = 2, cex = 0.75), vjust = 2)) if(save_plot){ png(output_path, height = 12, width = 5, units = 'in', res = 300) - print(figure3) + print(figure) dev.off() } - return(figure3) + if(save_pdf){ + pdf(output_path, height = 228/in2mm, width = 114/in2mm) + print(figure) + dev.off() + } + return(figure) } -figure3(save_plot = T, paste0(output, 'figure3_', tranche,'_', test, '.png')) +figure3(save_plot = F, save_pdf = T,output_path = paste0(output, 'final_pdf_figures/figure3_', test, '.pdf')) +# figure3(save_plot = T, paste0(output, 'figure3_', tranche,'_', test, '.png')) diff --git a/R/figure4.R b/R/figure4.R index a7712d8..476e9ff 100644 --- a/R/figure4.R +++ b/R/figure4.R @@ -8,7 +8,7 @@ gene_DD = load_ukb_file('forKonrad_sig31kDNM_consensus_genes_2021_01_12.txt', su setwd('~/gene_lists/lists/') gene_sig_after = gene_sig_after %>% filter(annotation != 'pLoF|missense|LC') -figure4 = function(save_plot = F, output_path){ +figure4 = function(save_plot = F, save_pdf = F, output_path){ universe = as.data.frame(fread('universe.tsv', quote="", header = F)) gene_universe = gene_sig_after %>% filter(gene_symbol %in% universe[, 1]) @@ -35,7 +35,7 @@ figure4 = function(save_plot = F, output_path){ gene_set_name = factor(gene_set_name, levels = names(gene_list_names)), group = factor(group, levels = c('Background', 'Test Set'))) print(matched) - matched$panel = c(rep(rep(c(1, 2), each = 6), 4)) + matched$panel = c(rep(rep(c(1, 2), each = 6), 3)) matched_test_fisher = matched %>% mutate(no_sig_cnt = cnt - sig_cnt) %>% @@ -52,13 +52,19 @@ figure4 = function(save_plot = F, output_path){ figure4_p2 = matched %>% filter(panel == 2) %>% save_subset_matched_figure2(., matched_test_fisher%>% filter(panel == 2)) figure = ggpubr::ggarrange(figure4_p1, figure4_p2, ncol = 2, common.legend = TRUE) if(save_plot){ - png(output_path, height = 10, width = 7.5, units = 'in', res = 300) + png(output_path, height = 8, width = 7.5, units = 'in', res = 300) + print(figure) + dev.off() + } + if(save_pdf){ + pdf(output_path, height = 174/in2mm, width = 174/in2mm) print(figure) dev.off() } return(figure) } -figure4(save_plot = T, output_path = paste0(output, 'figure4_gene_list_matching_', test, '.png')) +figure4(save_plot = F, save_pdf = T, output_path = paste0(output, 'final_pdf_figures/figure4_gene_list_matching_', test, '.pdf')) +# figure4(save_plot = T, output_path = paste0(output, 'figure4_gene_list_matching_', test, '.png')) diff --git a/R/figureS11.R b/R/figureS11.R index 874519d..816b29d 100644 --- a/R/figureS11.R +++ b/R/figureS11.R @@ -1,7 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -figureS11 = function(result_type, save_plot = F, output_path){ +figureS11 = function(result_type, save_plot = F, save_pdf = F, output_path){ if(result_type == 'gene'){ gene_lambda = load_ukb_file(paste0('lambda_by_pheno_expected_ac_filtered_gene_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') lambda = gene_lambda %>% @@ -31,6 +31,11 @@ figureS11 = function(result_type, save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 105/in2mm, width = 174/in2mm) + print(figure) + dev.off() + } }else{ var_lambda = load_ukb_file(paste0('lambda_by_pheno_expected_ac_var_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') lambda = var_lambda %>% @@ -59,7 +64,8 @@ figureS11 = function(result_type, save_plot = F, output_path){ return(figure) } -figureS11('gene', save_plot = T, output_path = paste0(output_path, 'figureS11_lambda_expected_ac_gene.png')) +figureS11('gene', save_plot = T, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS11_lambda_expected_ac_gene.pdf')) +# figureS11('gene', save_plot = T, output_path = paste0(output_path, 'figureS11_lambda_expected_ac_gene.png')) diff --git a/R/figureS12.R b/R/figureS12.R index e179248..967e3a6 100644 --- a/R/figureS12.R +++ b/R/figureS12.R @@ -1,7 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -figureS12 = function(save_plot = F, output_path){ +figureS12 = function(save_plot = F, save_pdf = F, output_path){ lambda_gene_full_before = load_ukb_file(paste0('lambda_by_pheno_full_gene_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') lambda_gene_full_after = load_ukb_file(paste0('lambda_by_pheno_full_filtered_gene_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') @@ -38,7 +38,15 @@ figureS12 = function(save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 116/in2mm, width = 174/in2mm) + print(figure) + dev.off() + } + return(figure) } -figureS12(save_plot = T, output_path = paste0(output_path, 'figureS12_lambda_gene_before_after_filter.png')) + +figureS12(save_plot = T, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS12_lambda_gene_before_after_filter.pdf')) +# figureS12(save_plot = T, output_path = paste0(output_path, 'figureS12_lambda_gene_before_after_filter.png')) diff --git a/R/figureS13.R b/R/figureS13.R index 0b5b139..e575580 100644 --- a/R/figureS13.R +++ b/R/figureS13.R @@ -1,7 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -figureS13 = function(save_plot = T, output_path){ +figureS13 = function(save_plot = T, save_pdf = F, output_path){ lambda_by_gene_before_coverage = load_ukb_file(paste0('lambda_by_gene_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') if(tranche == '500k'){ lambda_by_gene_before_coverage = lambda_by_gene_before_coverage %>% @@ -46,7 +46,16 @@ figureS13 = function(save_plot = T, output_path){ print(figure) dev.off() } + if(save_pdf){ + ggsave(filename=output_path, plot=figure, device=cairo_pdf, + width = 174/in2mm, height = 232/in2mm, units = "in", dpi = 300) + # pdf(output_path, height = 232/in2mm, width = 174/in2mm) + # print(figure) + # dev.off() + } return(figure) } -figureS13(save_plot = T, output_path = paste0(path_to, 'figureS13_lambda_by_gene_by_coverage.png')) + +figureS13(save_plot = T, save_pdf = T, output_path = paste0(path_to, 'final_pdf_figures/figureS13_lambda_by_gene_by_coverage.pdf')) +# figureS13(save_plot = T, output_path = paste0(path_to, 'figureS13_lambda_by_gene_by_coverage.png')) diff --git a/R/figureS14.R b/R/figureS14.R index bf6f367..a88680e 100644 --- a/R/figureS14.R +++ b/R/figureS14.R @@ -2,7 +2,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -figureS14 = function(save_plot = F, output_path){ +figureS14 = function(save_plot = F, save_pdf = F, output_path){ lambda_gene_annt = load_ukb_file(paste0('lambda_by_pheno_annt_filtered_gene_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') lambda_gene_annt = lambda_gene_annt %>% @@ -32,7 +32,14 @@ figureS14 = function(save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 116/in2mm, width = 174/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS14(save_plot = T, output_path = paste0(output_path, 'figureS14_lambda_dist_by_pheno_filtered_density.png')) + +figureS14(save_plot = T, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS14_lambda_dist_by_pheno_filtered_density.pdf')) +# figureS14(save_plot = T, output_path = paste0(output_path, 'figureS14_lambda_dist_by_pheno_filtered_density.png')) diff --git a/R/figureS15.R b/R/figureS15.R index f7abee8..4d6f8f3 100644 --- a/R/figureS15.R +++ b/R/figureS15.R @@ -1,7 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -figureS15 = function(save_plot = F, output_path){ +figureS15 = function(save_plot = F, save_pdf = F, output_path){ lambda_by_gene = load_ukb_file(paste0('lambda_by_gene_filtered_', tranche,'.txt.bgz'), subfolder = 'qc/lambda_gc/') if(tranche == '500k'){ lambda_by_gene = lambda_by_gene %>% @@ -35,7 +35,13 @@ figureS15 = function(save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 116/in2mm, width = 174/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS15(save_plot = T, output_path = paste0(output_path, 'figureS15_lambda_dist_by_gene_filtered.png')) +figureS15(save_plot = T, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS15_lambda_dist_by_gene_filtered.pdf')) +# figureS15(save_plot = T, output_path = paste0(output_path, 'figureS15_lambda_dist_by_gene_filtered.png')) diff --git a/R/figureS16.R b/R/figureS16.R index d2a38b3..e071bd0 100644 --- a/R/figureS16.R +++ b/R/figureS16.R @@ -1,8 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -output_path = '~/Desktop/final_figures/' -figureS16 = function(save_plot = F, filter = T, output_path){ +figureS16 = function(save_plot = F, save_pdf = F, filter = T, output_path){ corr = load_ukb_file(paste0('pheno_correlation_before_filter_', tranche,'.txt.bgz'), subfolder = 'qc/') count = data.frame( # pickle.load(open('pheno_correlated_cnt_list_500k', 'rb')) @@ -16,13 +15,16 @@ figureS16 = function(save_plot = F, filter = T, output_path){ geom_histogram(binwidth = 0.01, alpha = 0.7) + theme_classic() + scale_y_log10(label = comma) + scale_x_continuous(breaks = seq(0, 1, 0.1))+ labs(y = 'Phenotype pairs', x = expression(bold(paste('Correlation (r'^2, ')')))) + - theme(axis.title.x = element_text(family = "Arial", face = 'bold'))+ - trait_color_scale + trait_fill_scale + themes + theme(axis.title.x = element_text(face = 'bold'))+ + trait_color_scale + trait_fill_scale + themes + + theme(axis.title = element_text(size = 8, face = 'bold'),) figureS16b = count %>% ggplot + aes(x = factor(Corr), y = Count) + geom_bar(stat = "identity", alpha = 0.7) + theme_classic() + + scale_y_continuous(limits = c(0, 1600))+ labs(y = 'Phenotypes removed', x = 'Correlation threshold') + - geom_text(aes(label = Count), vjust = -0.3, size = 2.5) + themes + geom_text(aes(label = Count), vjust = -0.3, size = 2.5) + themes + + theme(axis.title = element_text(size = 8, face = 'bold'),) figure = ggarrange(figureS16a, figureS16b, labels = c('A', 'B'), nrow = 2, label.args = list(gp = gpar(font = 2, cex = 0.75), vjust = 2)) if(save_plot){ @@ -30,8 +32,14 @@ figureS16 = function(save_plot = F, filter = T, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 85/in2mm, width = 85/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS16(save_plot = T, filter = F, output_path = paste0(output_path, 'figureS16_pheno_corr_count.png')) +figureS16(save_plot = T, save_pdf = T, filter = F, output_path = paste0(output_path, 'final_pdf_figures/figureS16_pheno_corr_count.pdf')) +# figureS16(save_plot = T, filter = F, output_path = paste0(output_path, 'figureS16_pheno_corr_count.png')) diff --git a/R/figureS17.R b/R/figureS17.R index b481c52..78f2863 100644 --- a/R/figureS17.R +++ b/R/figureS17.R @@ -1,8 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -output_path = '~/Desktop/final_figures/' -figureS17 = function(type = 'full', save_plot = F, output_path){ +figureS17 = function(type = 'full', save_plot = F, save_pdf = F, output_path){ height_UKB = load_ukb_file('height_beta_500k.txt.bgz', subfolder = 'analysis/', force_cols = cols(locus = col_character())) %>% separate(markerID, c("chrom", "position", "REF", "ALT"), sep = "[^[:alnum:]]") %>% mutate(locus = new_locus) @@ -35,8 +34,14 @@ figureS17 = function(type = 'full', save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 85/in2mm, width = 85/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS17(type = 'full', save_plot = T, output_path = paste0(output_path, 'figureS17_giant_height_comparison_full.png')) -figureS17(type = 'sub', save_plot = T, output_path = paste0(output_path, 'figureS17_giant_height_comparison_sub.png')) \ No newline at end of file +figureS17(type = 'sub', save_plot = T, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS17_giant_height_comparison_sub.pdf')) +# figureS17(type = 'full', save_plot = T, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS17_giant_height_comparison_full.pdf')) +# figureS17(type = 'sub', save_plot = T, output_path = paste0(output_path, 'figureS17_giant_height_comparison_sub.png')) \ No newline at end of file diff --git a/R/figureS18.R b/R/figureS18.R index 21c1102..c676c2c 100644 --- a/R/figureS18.R +++ b/R/figureS18.R @@ -9,7 +9,7 @@ if(tranche=='500k'){ filter(annotation != 'pLoF|missense|LC') } -figureS18 = function(save_plot=F, output_path){ +figureS18 = function(save_plot=F, save_pdf = F, output_path){ sum = rbind( gene_sig_after %>% group_by(annotation) %>% sig_cnt_summary('all_sig_pheno_cnt') %>% mutate(type = "SKAT-O"), @@ -39,6 +39,12 @@ figureS18 = function(save_plot=F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 70/in2mm, width = 114/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS18(save_plot = T, paste0(output_path, 'figureS18_proportion_no_freq_matching_',tranche, '_', test,'.png')) \ No newline at end of file +figureS18(save_plot = T, save_pdf = T, paste0(output_path, 'final_pdf_figures/figureS18_proportion_no_freq_matching_',tranche, '_', test,'.pdf')) +# figureS18(save_plot = T, paste0(output_path, 'figureS18_proportion_no_freq_matching_',tranche, '_', test,'.png')) \ No newline at end of file diff --git a/R/figureS19.R b/R/figureS19.R index b3c7209..8cd1b10 100644 --- a/R/figureS19.R +++ b/R/figureS19.R @@ -25,10 +25,14 @@ save_prop_by_annt_freq_figure_500k_supp = function(matched_summary, type, output strip.background = element_blank(), strip.placement = "outside", strip.text = element_text(face = 'bold'), - axis.text= element_text(size = 10), - axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95) ) + + axis.text= element_text(size = 8), + axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95), + legend.key.size = unit(0.5, 'cm'), + axis.title.y = element_text(size = 9, face = 'bold'), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 7),) + # scale_x_discrete(labels = c(matched_summary$labels)) + - geom_text(aes(y = 0.002, label=paste(cnt, type), vjust = -1), size = 2) + geom_text(aes(y = 0.002, label=paste(cnt, type), vjust = -1), size = 1.9) if(save_plot){ png(output_path, height = 4, width = 7.5, units = 'in', res = 300) print(plt) @@ -37,7 +41,7 @@ save_prop_by_annt_freq_figure_500k_supp = function(matched_summary, type, output return(plt) } -figureS19 <- function(save_plot=F, output_path){ +figureS19 <- function(save_plot=F, save_pdf = F, output_path){ if(tranche=='500k'){ gene_sig_sum = format_sig_cnt_summary_data_500k(gene_sig_after, freq_col = 'CAF', sig_col = 'all_sig_pheno_cnt') var_sig_sum = format_sig_cnt_summary_data_500k(var_sig_after, freq_col = 'AF', sig_col = 'all_sig_pheno_cnt') @@ -54,7 +58,16 @@ figureS19 <- function(save_plot=F, output_path){ print(figure) dev.off() } + if(save_pdf){ + ggsave(filename=output_path, plot=figure, device=cairo_pdf, + width = 114/in2mm, height = 140/in2mm, units = "in", dpi = 300) + # pdf(output_path, height = 140/in2mm, width = 114/in2mm) + # print(figure) + # dev.off() + } return(figure) } -figureS19(save_plot = T, paste0(output_path, 'figureS19_common_variant_gene_',tranche, '_', test,'.png')) +figureS19(save_plot = T, save_pdf = T, paste0(output_path, 'final_pdf_figures/figureS19_common_variant_gene_',tranche, '_', test,'.pdf')) +# figureS19(save_plot = T, paste0(output_path, 'figureS19_common_variant_gene_',tranche, '_', test,'.png')) + diff --git a/R/figureS20.R b/R/figureS20.R index 2a538be..c72b3ea 100644 --- a/R/figureS20.R +++ b/R/figureS20.R @@ -3,7 +3,7 @@ detach(package:plyr) var_sig_after = load_ukb_file(paste0('var_sig_cnt_filtered_', test, '_', tranche,'.txt.bgz'), subfolder = 'analysis/' ,force_cols = var_cols) -figureS20 = function(save_plot = F, output_path){ +figureS20 = function(save_plot = F, save_pdf = F, output_path){ polyphen_sum = var_sig_after %>% filter(AF <= 0.1 & AF > 0.0001) %>% filter(!is.na(polyphen2) & polyphen2 != 'unknown') %>% @@ -43,7 +43,9 @@ figureS20 = function(save_plot = F, output_path){ strip.placement = "outside", strip.text = element_text(face = 'bold'), axis.text= element_text(size = 9), - axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95) ) + + axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 7),) + geom_segment(data = annt_sig, size = .6, aes(x = x, y = y, xend = xend, yend = y)) + geom_text(data = annt_sig, aes(x = (x+xend)/2, y = y, label = sig_label), size = 6) @@ -52,7 +54,13 @@ figureS20 = function(save_plot = F, output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 90/in2mm, width = 114/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS20(save_plot = T, output_path = paste0(output, 'figureS20_polyphen_',tranche, '_', test,'.png')) \ No newline at end of file +figureS20(save_plot = T, save_pdf = T, output_path = paste0(output, 'final_pdf_figures/figureS20_polyphen_',tranche, '_', test,'.pdf')) +# figureS20(save_plot = T, output_path = paste0(output, 'figureS20_polyphen_',tranche, '_', test,'.png')) \ No newline at end of file diff --git a/R/figureS8.R b/R/figureS8.R index 0bd1371..03e7a6b 100644 --- a/R/figureS8.R +++ b/R/figureS8.R @@ -6,7 +6,7 @@ rp_gene_p_value_skato = load_ukb_file('rp_gene_p_value_skato_300k.txt.bgz', sub rp_gene_p_value_burden = load_ukb_file('rp_gene_p_value_burden_300k.txt.bgz', subfolder = 'analysis/') rp_var_p_value = load_ukb_file('rp_var_p_value300k.txt.bgz', subfolder = 'analysis/') -figureS8 = function(save_plot = F, direction = 'long', output_path){ +figureS8 = function(save_plot = F, save_pdf = F, direction = 'long', output_path){ var_p_value = format_random_pheno_p_data(rp_var_p_value, test_type = 'Single-Variant') gene_p_value_skato = format_random_pheno_p_data(rp_gene_p_value_skato, test_type = 'SKAT-O') gene_p_value_burden = format_random_pheno_p_data(rp_gene_p_value_burden, test_type = 'Burden Test') @@ -59,10 +59,16 @@ figureS8 = function(save_plot = F, direction = 'long', output_path){ print(figure) dev.off() } + if(save_pdf){ + pdf(output_path, height = 232/in2mm, width = 174/in2mm) + print(figure) + dev.off() + } return(figure) } -figureS8(save_plot = T, output_path = paste0(output_path, 'figureS8_random_pheno_qq_plot.png')) +figureS8(save_plot = F, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS8_random_pheno_qq_plot.pdf')) +# figureS8(save_plot = T, output_path = paste0(output_path, 'figureS8_random_pheno_qq_plot.png')) diff --git a/R/figureS9.R b/R/figureS9.R index 0d4e6e3..daf06db 100644 --- a/R/figureS9.R +++ b/R/figureS9.R @@ -1,8 +1,7 @@ source('~/ukb_exomes/R/constants.R') detach(package:plyr) -output_path = '~/Desktop/final_figures/' -figureS9 = function(save_plot = F, output_path){ +figureS9 = function(save_plot = F, save_pdf = F, output_path){ # Filtered n_var<2 & coverage<20 rp_lambda_gene_caf = load_ukb_file('rp_lambda_gene_caf_300k.txt.bgz', subfolder='analysis/') rp_lambda_gene_caf = rp_lambda_gene_caf %>% @@ -21,15 +20,31 @@ figureS9 = function(save_plot = F, output_path){ geom_hline(yintercept = 1, lty = 2, size = 0.25) + scale_x_log10(label = comma) + trait_color_scale + trait_fill_scale + - facet_grid(heritability ~ caf, scales = 'free', labeller = label_type) + themes + facet_grid(heritability ~ caf, scales = 'free', labeller = label_type) + themes + + theme(strip.text = element_text(size = 8), + plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"), + axis.text.x = element_text(size = 8, angle = 30, vjust = 0.8), + axis.text.y = element_text(size = 8), + legend.key.size = unit(0.5, 'cm'), + axis.title.y = element_text(size = 9, face = 'bold'), + legend.title = element_text(size = 8, face = 'bold'), + legend.text = element_text(size = 7), ) if(save_plot){ png(output_path, height = 5, width = 9, units = 'in', res = 300) print(figure) dev.off() } + if(save_pdf){ + ggsave(filename=output_path, plot=figure, device=cairo_pdf, + width = 174/in2mm, height = 116/in2mm, units = "in", dpi = 300) + # pdf(output_path, height = 116/in2mm, width = 174/in2mm) + # print(figure) + # dev.off() + } return(figure) } -figureS9(save_plot = T, output_path = paste0(output_path, 'figureS9_random_pheno_lambda_gene_by_caf_h2_skato.png')) +figureS9(save_plot = F, save_pdf = T, output_path = paste0(output_path, 'final_pdf_figures/figureS9_random_pheno_lambda_gene_by_caf_h2.pdf')) +# figureS9(save_plot = T, output_path = paste0(output_path, 'figureS9_random_pheno_lambda_gene_by_caf_h2_skato.png'))