Skip to content

Commit

Permalink
Merge pull request #61 from Nealelab/pipeline
Browse files Browse the repository at this point in the history
create PDFs
  • Loading branch information
wlu04 authored May 4, 2022
2 parents e956e9b + 18b7120 commit 0f73ee0
Show file tree
Hide file tree
Showing 17 changed files with 346 additions and 106 deletions.
57 changes: 34 additions & 23 deletions R/constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ source('~/ukbb_pan_ancestry/constants.R')

tranche = '500k'
test = 'skato'
path_to = '~/Desktop/ukb_exomes_450k/'
output = '~/Desktop/ukb_exomes_450k/'
output_path = '~/Desktop/ukb_exomes_450k/'
path_to = '~/Dropbox (Partners HealthCare)/analysis/ukb_exomes_450k/'
output = '~/Dropbox (Partners HealthCare)/analysis/ukb_exomes_450k/'
output_path = '~/Dropbox (Partners HealthCare)/analysis/ukb_exomes_450k/'
in2mm = 25.4



Expand All @@ -44,7 +45,8 @@ caf_names = c(paste0('CAF:[0 , 0.0001]'), 'CAF:(0.0001, 0.001]', 'CAF:(0.001, 0.
ac_types = c('(0, 5]', '(5, 50]', '(50, 500]', '(500, 5000]', '(5000, 50000]', '(50000, )')
ac_names = c('(0, 5]', '(5, 50]', '(50, 500]', '(500, 5000]', '(5000, 50000]', paste0('(50000, ', bquote("\U221E"), ' )'))
gene_list_names = c('Constrained' = 'Constrained', 'Developmental Delay' = 'Developmental\nDelay', 'all_ad' = 'Autosomal dominant', 'all_ar' = 'Autosomal recessive',
'fda_approved_drug_targets' = 'FDA approved\ndrug targets', 'gwascatalog' = 'GWAS catalog', 'CEGv2_subset_universe' = 'Cell essential', 'NEGv1_subset_universe' = 'Cell non-essential')
'fda_approved_drug_targets' = 'FDA approved\ndrug targets', 'gwascatalog' = 'GWAS catalog')
#, 'CEGv2_subset_universe' = 'Cell essential', 'NEGv1_subset_universe' = 'Cell non-essential')
icd_names = c('A' = 'Infectious', 'B' = 'Infectious','C' = 'Neoplasms', 'D' = 'Blood/immune', 'E' = 'Endocrine/metabolic', 'F' = 'Mental/behavioral', 'G' = 'Nervous', 'H1' = 'Eye',
'H2' = 'Ear', 'I' = 'Circulatory', 'J' = 'Respiratory', 'K' = 'Digestive', 'L' = 'Skin/subcutaneous', 'M' = 'Musculoskeletal', 'N' = 'Genitourinary', 'O' = 'Pregnancy',
'P' = 'Perinatal', 'Q' = 'Congenital', 'R' = 'Symptoms', 'S' = 'Injury/poison', 'T' = 'Injury/poison', 'V' = 'External causes', 'Y' = 'External causes', 'Z' = 'Health Factors')
Expand Down Expand Up @@ -246,7 +248,7 @@ save_group_matched_figure2 = function(matched_summary, levels, labels, group_col
geom_pointrange(stat = "identity", position = position_dodge(width = 2)) +
labs(y = 'Proportion', x = x_lab) +
scale_color_manual(name = x_lab, values = c("#D95F02", "#7570B3", "#1B9E77")) +
scale_y_continuous(label = label_percent(accuracy = 1)) +
scale_y_continuous(label = label_percent(accuracy = 1), limits = c(0, 1.05)) +
scale_x_discrete(labels = levels) +
theme_classic() + themes +
facet_grid(~interval) +
Expand Down Expand Up @@ -561,20 +563,21 @@ save_icd_manhattan_figure = function(data, p_filter = 1e-2, width = 10, spacing
icd_mh_plt = data %>%
filter(min_p < p_filter) %>%
ggplot + aes(x = pos, y = -log10(min_p), color = icd10) +
geom_point(size = 0.75) +
geom_point_rast(size = 0.75) +
geom_hline(yintercept = -log10(sig_level), lty = 2)+
scale_x_continuous(breaks = ctr, labels = icd_names[icd_labels]) +
scale_color_manual(values = icd_colors) +
scale_y_continuous(trans = gwas_loglog_trans(), breaks = loglog_breaks, name = expression(bold(paste('-log'[10], '(', italic(p), ')'))))+
# scale_y_continuous(trans = gwas_loglog_trans(), breaks = loglog_breaks, name = expression(bold(paste('-log'[10], '(', italic(p), ')'))))+
scale_y_continuous(trans = gwas_loglog_trans(), breaks = c(seq(2, 10, 2), 20, 50, 100, 200, 400), name = expression(bold(paste('-log'[10], '(', italic(p), ')'))))+
labs(x = NULL, y = expression(bold(paste('-log'[10], '(', italic(p), ')')))) +
theme_classic() + themes +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95, size = 12),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95, size = 7),
axis.text.y = element_text(size = 7),
plot.margin = margin(0.8, 0.1, 0.1, 0.1, "cm"),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = 'bold'),
legend.text = element_text(size = 12), )
axis.title = element_text(size = 8),
legend.title = element_text(size = 8, face = 'bold'),
legend.text = element_text(size = 8), )
if(save_plot){
png(output_path, height = 4, width = 6, units = 'in', res = 300)
print(icd_mh_plt)
Expand Down Expand Up @@ -619,9 +622,14 @@ save_prop_by_annt_freq_figure_500k = function(matched_summary, output_path, save
theme(panel.spacing = unit(0, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
strip.text = element_text(face = 'bold'),
axis.text= element_text(size = 10),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95) )
strip.text = element_text(size = 8, face = 'bold'),
axis.title = element_text(size = 9),
axis.text = element_text(size = 7),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 0.95),
legend.key.size = unit(0.5, 'cm'),
# axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95),
legend.title = element_text(size = 8, face = 'bold'),
legend.text = element_text(size = 7))
if(save_plot){
png(output_path, height = 3, width = 5, units = 'in', res = 300)
print(plt)
Expand All @@ -639,11 +647,12 @@ save_count_barplot_figure = function(cnt_data, cnt_type, save_plot = F, output_p
scale_alpha_discrete(range = c(0.5, 1)) +
labs(y = paste('Number of', cnt_type), x = NULL, alpha = NULL) +
annotation_color_scale + annotation_fill_scale + themes +
geom_text(aes(label = cnt), vjust = -0.3, size = 3, position = position_dodge(width = 1), color = '#187bcd') +
geom_text(aes(label = cnt), vjust = -0.1, size = 2, position = position_dodge(width = 1), color = '#187bcd') +
theme_classic() + themes + theme(legend.position = 'none')+
theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"),
axis.title = element_text(size = 10),
axis.text = element_text(size = 10),
theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"),
axis.title = element_text(size = 8),
axis.text = element_text(size = 7),
#axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95),
legend.title = element_text(size = 8, face = 'bold'),
legend.text = element_text(size = 8), )
if(save_plot){
Expand All @@ -666,13 +675,15 @@ save_count_by_freq_figure = function(cnt_data, type, save_plot = F, output_path)
scale_y_continuous(label = comma) +
labs(y = paste('Number of', type), x = paste(freq, 'Interval'))+
annotation_color_scale + annotation_fill_scale + themes +
geom_text(aes(label = cnt, color = annotation), vjust = -0.3, size = 2, position = position_dodge(width = 1)) +
geom_text(aes(label = cnt, color = annotation), vjust = -0.1, size = 2, position = position_dodge(width = 1)) +
theme_classic() + themes +
theme(plot.margin = margin(0.5, 0.1, 0.1, 0.1, "cm"),
axis.title = element_text(size = 10),
axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95),
axis.title = element_text(size = 8),
axis.text = element_text(size = 7),
legend.key.size = unit(0.5, 'cm'),
# axis.text.x = element_text(angle = 30, vjust = 1, hjust = 0.95),
legend.title = element_text(size = 8, face = 'bold'),
legend.text = element_text(size = 8))
legend.text = element_text(size = 7))
if(save_plot){
png(output_path, height = 4, width = 6, units = 'in', res = 300)
print(plt)
Expand Down
24 changes: 17 additions & 7 deletions R/figure1.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
source('~/ukb_exomes/R/constants.R')
detach(package:plyr)
#install.packages("Cairo")
library(Cairo)

gene = load_ukb_file(paste0('gene_qc_metrics_ukb_exomes_', tranche,'.txt.bgz'), subfolder = 'qc/')
var = load_ukb_file(paste0('variant_qc_metrics_ukb_exomes_', tranche,'.txt.bgz'), subfolder = 'qc/')
pheno = load_ukb_file(paste0('pheno_qc_metrics_ukb_exomes_', tranche,'.txt.bgz'), subfolder = 'qc/')

figure1 = function(save_plot = F, output_path){
figure1 = function(save_plot = F, save_pdf = F,output_path){
figure1_cnt = data.frame(
type = rep(c('Variants', 'Groups', 'Phenotypes'), 2),
cnt = as.integer(c(nrow(var), nrow(gene), nrow(pheno),
Expand All @@ -15,9 +17,9 @@ figure1 = function(save_plot = F, output_path){
filter = rep(c('Before Filtering', 'After Filtering'), each = 3)
) %>% mutate(filter = factor(filter, levels = c('Before Filtering', 'After Filtering')))

figure1a = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Phenotypes', save_plot = T, output_path = paste0(output, 'figure1a_pheno_cnt.png'))
figure1b = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Variants', save_plot = T, output_path = paste0(output, 'figure1b_var_cnt.png'))
figure1c = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Groups', save_plot = T, output_path = paste0(output, 'figure1c_gene_cnt.png'))
figure1a = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Phenotypes', save_plot = save_plot, output_path = paste0(output, 'figure1a_pheno_cnt.png'))
figure1b = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Variants', save_plot = save_plot, output_path = paste0(output, 'figure1b_var_cnt.png'))
figure1c = save_count_barplot_figure(cnt_data = figure1_cnt, cnt_type = 'Groups', save_plot = save_plot, output_path = paste0(output, 'figure1c_gene_cnt.png'))

gene_cnt_by_freq = gene %>%
filter(keep_gene_expected_ac & keep_gene_coverage & keep_gene_n_var & get(paste0('keep_gene_',test))) %>%
Expand All @@ -27,8 +29,8 @@ figure1 = function(save_plot = F, output_path){
mutate(annotation = if_else(annotation %in% c('missense', 'LC'), 'missense|LC', annotation)) %>%
format_count_by_freq_data(freq_col = 'AF')

figure1d = save_count_by_freq_figure(cnt_data = var_cnt_by_freq, type = 'Variants', save_plot = T, output_path = paste0(output, 'figure1d_var_cnt_by_AF.png'))
figure1e = save_count_by_freq_figure(cnt_data = gene_cnt_by_freq, type = 'Groups', save_plot = T, output_path = paste0(output, 'figure1e_gene_cnt_by_CAF.png'))
figure1d = save_count_by_freq_figure(cnt_data = var_cnt_by_freq, type = 'Variants', save_plot = save_plot, output_path = paste0(output, 'figure1d_var_cnt_by_AF.png'))
figure1e = save_count_by_freq_figure(cnt_data = gene_cnt_by_freq, type = 'Groups', save_plot = save_plot, output_path = paste0(output, 'figure1e_gene_cnt_by_CAF.png'))

figure1_top = egg::ggarrange(figure1a, figure1b, figure1c, labels = c('(A) Phenotypes', '(B) Variants', '(C) Groups'), ncol = 3,
label.args = list(gp = gpar(font = 2, cex = 0.75), vjust = 1.5, hjust = 0))
Expand All @@ -40,7 +42,15 @@ figure1 = function(save_plot = F, output_path){
print(figure)
dev.off()
}
if(save_pdf){
ggsave(filename=output_path, plot=figure, device=cairo_pdf,
width = 174/in2mm, height = 130/in2mm, units = "in", dpi = 300)
# cairo_pdf(output_path, height = 130/in2mm, width = 174/in2mm, family="NimbusSan")
# print(figure)
# dev.off()
}
return(figure)
}

figure1(save_plot = T, output_path = paste0(output, 'figure1_', test, '.png'))
figure1(save_pdf = T, output_path = paste0(output, 'final_pdf_figures/figure1_', test, '.pdf'))
# figure1(save_plot = T, output_path = paste0(output, 'figure1_', test, '.png'))
Loading

0 comments on commit 0f73ee0

Please sign in to comment.