-
Notifications
You must be signed in to change notification settings - Fork 3
/
f - run_linear_regression.R
129 lines (107 loc) · 5.81 KB
/
f - run_linear_regression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
######################################### Adjusted Linear Regression ########################################
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Purpose: This function runs the adjusted linear regression for all chemicals and all immune measures
#
# Inputs: long_nhanes_subset - long dataframe containing complete demographic and cognitive data for each
# participant
# conversion - dataframe of chemical names, codenames, and families
#
# Outputs: model_stats_adjusted - dataframe of linear regression outputs
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
run_linear_regression <- function(long_nhanes_subset,
conversion,
weights_dataset,
chem_master,
nhanes_subset)
{
library(tidyverse)
library(beepr)
#TEMPORARY
# long_nhanes_subset <- long_nhanes_subset_dataset
# conversion <- use_these_chems
long_nhanes_subset$chemical_codename <- as.character(long_nhanes_subset$chemical_codename)
long_nhanes_subset <- long_nhanes_subset %>%
mutate(chem_copy = chemical_codename)
#run linear regressions
# print("run reg")
df_regressions_i <- long_nhanes_subset %>%
# filter(chemical_codename %in% c("URX24D"
# # , "LBXBPB"
# )) %>%
# filter(celltype_codename %in% c(#"LBXBAPCT",
# # "LBDLYMNO",
# "LBDMONO"
# # "LBDNENO",
# # "LBDEONO",
# # "LBDBANO"
# )) %>%
group_by(celltype_codename, chemical_codename) %>%
do(run_if_else_glm_weighted(.,
weights_dataset,
chem_master,
nhanes_subset)) %>% #the dot is each group by chunk - pulling from chemical_immune_chunk
ungroup(.)
# View(df_regressions_i)
#############################################################################################################
############################################## RENAME CHEMICALS #############################################
#############################################################################################################
conversion_subset <- conversion %>%
dplyr::select(chemical_codename_use,
chem_family,
chemical_name) %>%
distinct(chemical_codename_use, .keep_all = TRUE) %>%
rename(chemical_codename = chemical_codename_use)
#merge in the chemical_names
df_regressions_names <- left_join(df_regressions_i, conversion_subset, by = "chemical_codename")
df_regressions_names <- df_regressions_names %>%
mutate(immune_measure = case_when(
celltype_codename == "LBDLYMNO" ~ "Lymphocyte (1000 cells/uL)",
celltype_codename == "LBDMONO" ~ "Monocyte (1000 cells/uL)",
celltype_codename == "LBDNENO" ~ "Neutrophils (1000 cells/uL)",
celltype_codename == "LBDEONO" ~ "Eosinophils (1000 cells/uL)",
celltype_codename == "LBDBANO" ~ "Basophils (1000 cells/uL)",
celltype_codename == "LBXWBCSI" ~ "WBC (1000 cells/uL)",
celltype_codename == "LBXRBCSI" ~ "RBC (million cells/uL)",
celltype_codename == "LBXMCVSI" ~ "Mean Corpuscular Volume (fL)"))
# View(df_regressions_names)
#############################################################################################################
###################################### ADD CONFIDENCE INTERVALS AND FDR #####################################
#############################################################################################################
#calculate the unscaled confidence intervals
z_score <- 1.96
model_stats_CI_unscaled <- df_regressions_names %>%
mutate(lower.CI = estimate - (z_score*std.error),
upper.CI = estimate + (z_score*std.error))
model_stats_CI_unscaled_chem_other <- model_stats_CI_unscaled %>%
filter(term != "chem_log_measurement")
model_stats_CI_unscaled_chem <- model_stats_CI_unscaled %>%
filter(term == "chem_log_measurement") %>%
group_by(celltype_codename) %>%
mutate(FDR = p.adjust(p.value, method = "BY")) %>%
ungroup()
model_stats_CI_unscaled_updated <- full_join(model_stats_CI_unscaled_chem
, model_stats_CI_unscaled_chem_other
, by = NULL) %>%
arrange(celltype_codename
, chemical_codename)
# View(model_stats_CI_unscaled_updated)
model_stats_adjusted <- model_stats_CI_unscaled_updated %>%
dplyr::select(chemical_name,
chemical_codename,
chem_family,
immune_measure,
celltype_codename,
term,
estimate,
std.error,
lower.CI,
upper.CI,
statistic,
p.value,
FDR,
nobs)
# View(model_stats_adjusted)
beep()
return(model_stats_adjusted)
}