-
Notifications
You must be signed in to change notification settings - Fork 0
/
6_averages.R
177 lines (148 loc) · 7.22 KB
/
6_averages.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
## ------------------------------------------ ##
# SPARC Soil P -- Calculating Averages
## ------------------------------------------ ##
# Script author(s): Nick J Lyon
# Purpose:
## Take all 'stats ready' files and create plot- and site-level averages
## ------------------------------------------ ##
# Housekeeping -----
## ------------------------------------------ ##
# Load necessary libraries
# install.packages("librarian")
librarian::shelf(tidyverse, googledrive)
# Create necessary sub-folder(s)
dir.create(path = file.path("data", "stats_ready"), showWarnings = F)
dir.create(path = file.path("data", "averages"), showWarnings = F)
# Clear environment
rm(list = ls())
# Identify needed tidy file(s)
tidy_drive <- googledrive::as_id("https://drive.google.com/drive/u/0/folders/1pjgN-wRlec65NDLBvryibifyx6k9Iqy9")
# Identify all files in that folder that are 'stats-ready'
ready_files <- googledrive::drive_ls(path = tidy_drive) %>%
dplyr::filter(stringr::str_detect(string = name, pattern = "_stats-ready_"))
# Download those files locally
purrr::walk2(.x = ready_files$id, .y = ready_files$name,
.f = ~ googledrive::drive_download(file = .x, overwrite = T,
path = file.path("data", "stats_ready", .y)))
# Identify those files locally
( local_files <- dir(path = file.path("data", "stats_ready")) )
## ------------------------------------------ ##
# Averaging Steps ----
## ------------------------------------------ ##
# We'll loop across each stats-ready file separately to get this done
for(focal_ready in local_files){
# Starting message
message("Beginning averaging steps for '", focal_ready, "'")
# Read in file
focal_df <- read.csv(file = file.path("data", "stats_ready", focal_ready))
# Prepare to do averaging steps flexibly
avgs_prep <- focal_df %>%
# Move site information columns to left and all together
dplyr::relocate(lter:raw_filename, site:core, rock_code:generic_soil,
.before = dplyr::everything()) %>%
# Drop core-specific depth/horizon info
dplyr::select(-dplyr::starts_with("horizon"),
-dplyr::starts_with("depth."),
-core.length_cm, -bulk.density_g.cm3) %>%
# Reshape to long format so all number columns are together
tidyr::pivot_longer(cols = -lter:-generic_soil,
names_to = "variables", values_to = "vals")
# Begin with averaging across cores within plots
plots_v1 <- avgs_prep %>%
# Group by everything and average the response variables
dplyr::group_by(lter, dataset_simp, dataset, site, block, plot, variables) %>%
dplyr::summarize(mean = mean(vals, na.rm = T),
std.dev = sd(vals, na.rm = T),
sample.size = dplyr::n(),
std.error = std.dev / sqrt(sample.size)) %>%
dplyr::ungroup()
# Next, average across plots within blocks
blocks_v1 <- plots_v1 %>%
# Rename summary metrics columns
dplyr::rename(prev_std.dev = std.dev,
prev_sample.size = sample.size,
prev_std.error = std.error,
vals = mean) %>%
# Group by everything *except* plot
dplyr::group_by(lter, dataset_simp, dataset, site, block, variables) %>%
# And get averages (and variation metrics) again
dplyr::summarize(sample.size = dplyr::n(),
mean = ifelse(test = all(sample.size) == 1,
yes = unique(vals),
no = mean(vals, na.rm = T)),
std.dev = ifelse(test = all(sample.size) == 1,
yes = unique(prev_std.dev),
no = sd(vals, na.rm = T)),
std.error = ifelse(test = all(sample.size) == 1,
yes = unique(prev_std.error),
no = std.dev / sqrt(sample.size)) ) %>%
# And ungroup
dplyr::ungroup()
# Finally, average across blocks within sites
sites_v1 <- blocks_v1 %>%
# Rename summary metrics columns
dplyr::rename(prev_std.dev = std.dev, prev_sample.size = sample.size,
prev_std.error = std.error, vals = mean) %>%
# Group by everything *except* block
dplyr::group_by(lter, dataset_simp, dataset, site, variables) %>%
# And get averages (and variation metrics) again
dplyr::summarize(sample.size = dplyr::n(),
mean = ifelse(test = all(sample.size) == 1,
yes = unique(vals),
no = mean(vals, na.rm = T)),
std.dev = ifelse(test = all(sample.size) == 1,
yes = unique(prev_std.dev),
no = sd(vals, na.rm = T)),
std.error = ifelse(test = all(sample.size) == 1,
yes = unique(prev_std.error),
no = std.dev / sqrt(sample.size)) ) %>%
# And ungroup
dplyr::ungroup()
# Tweak 'shape' of plot and site averages for viz/stats use
plots_v2 <- plots_v1 %>%
# Drop sample size column
dplyr::select(-sample.size) %>%
# Pivot remaining columns into long format
tidyr::pivot_longer(cols = mean:std.error,
names_to = "stat", values_to = "value") %>%
# Combine statistic with variable
dplyr::mutate(name_actual = paste0(stat, "_", variables)) %>%
# Drop now-superseded columns
dplyr::select(-stat, -variables) %>%
# Flip back to wide format
tidyr::pivot_wider(names_from = name_actual,
values_from = value)
# Do the same for the site-level averages
sites_v2 <- sites_v1 %>%
# Drop sample size column
dplyr::select(-sample.size) %>%
# Pivot remaining columns into long format
tidyr::pivot_longer(cols = mean:std.error,
names_to = "stat", values_to = "value") %>%
# Combine statistic with variable
dplyr::mutate(name_actual = paste0(stat, "_", variables)) %>%
# Drop now-superseded columns
dplyr::select(-stat, -variables) %>%
# Flip back to wide format
tidyr::pivot_wider(names_from = name_actual,
values_from = value)
# Get file names for both plot-level and site-level averages
site_name <- gsub(pattern = "_stats-ready_", replacement = "_site-avgs_", x = focal_ready)
plot_name <- gsub(pattern = "_stats-ready_", replacement = "_plot-avgs_", x = focal_ready)
# Export both locally
write.csv(x = sites_v2, na = '', row.names = F,
file = file.path("data", "averages", site_name))
write.csv(x = plots_v2, na = '', row.names = F,
file = file.path("data", "averages", plot_name))
} # Closing loop
## ------------------------------------------ ##
# Export Averages to Drive ----
## ------------------------------------------ ##
# Identify all average files
( avg_done <- dir(path = file.path("data", "averages")) )
# Loop across these and upload to Drive
for(focal_avg in avg_done){
googledrive::drive_upload(media = file.path("data", "averages", focal_avg),
path = tidy_drive, overwrite = T)
}
# End ----