Skip to content

Commit

Permalink
Resolve merge conflicts to add new var names
Browse files Browse the repository at this point in the history
  • Loading branch information
awunderground committed Apr 10, 2023
2 parents ad5bd1f + 043ebb1 commit 9dde021
Show file tree
Hide file tree
Showing 37 changed files with 402,765 additions and 80,503 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ rfi-applicants-brief.xlsx
rfi-applicants.xlsx

/.quarto/
factsheets/999*
factsheets/998*
92 changes: 92 additions & 0 deletions R/load_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
load_data <- function() {

my_col_type <- cols(
state = col_character(),
county = col_character()
)

county_names <- read_csv(here("mobility-metrics", "old", "01_mobility-metrics.csv")) %>%
select(state, county, state_name, county_name) %>%
distinct()

data_recent <- read_csv(
here("mobility-metrics", "00_mobility-metrics_recent.csv"),
col_type =
cols(
state = col_character(),
county = col_character(),
ratio_black_nh_house_value_households = col_character(),
ratio_hispanic_house_value_households = col_character(),
ratio_other_nh_house_value_households = col_character(),
ratio_white_nh_house_value_households = col_character(),
ratio_population_pc_physician = col_character(),
.default = col_double()
)
) %>%
left_join(county_names, by = c("state", "county")) %>%
mutate(
share_debt_col_ub = 0,
share_debt_col_lb = 1,

digital_access = 0.1234,
digital_access_quality = 3

) %>%
prep_data()

data_years <- read_csv(
here("mobility-metrics","00_mobility-metrics_longitudinal.csv"),
col_type = cols(
state = col_character(),
county = col_character(),
ratio_black_nh_house_value_households = col_character(),
ratio_hispanic_house_value_households = col_character(),
ratio_other_nh_house_value_households = col_character(),
ratio_white_nh_house_value_households = col_character(),
ratio_population_pc_physician = col_character(),
.default = col_double()
)
) %>%
mutate(
election_turnout = 0.1234,
election_turnout_quality = 3,

digital_access = 0.1234,
digital_access_quality = 3

) %>%
left_join(county_names, by = c("state", "county")) %>%
prep_data()

data_race_ethnicity <- read_csv(
here("mobility-metrics", "01_mobility-metrics_race-ethnicity_recent.csv"),
col_types = my_col_type
) %>%
left_join(county_names, by = c("state", "county")) %>%
prep_data()

data_race <- read_csv(
here("mobility-metrics", "02_mobility-metrics_race_recent.csv"),
col_types = my_col_type
) %>%
left_join(county_names, by = c("state", "county")) %>%
prep_data()

data_race_share <- read_csv(
here("mobility-metrics", "03_mobility-metrics_race-share_recent.csv"),
col_types = my_col_type
) %>%
left_join(county_names, by = c("state", "county")) %>%
prep_data()

data_list <- list(
recent = data_recent,
years = data_years,
race_ethnicity = data_race_ethnicity,
race = data_race,
race_share = data_race_share
)

return(data_list)

}
66 changes: 45 additions & 21 deletions R/prep_data.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
prep_data <- function(data) {

# Get a list of all variables that should be presented as percentage
perc_vars <- c(
"share_debt_col", # % with debt
"share_homeless", # share homeless
"share_election_turnout", # % voting
"rate_low_birth_weight", # % low birthweight
"share_white_nh_exposure",
"share_black_nh_exposure",
"share_hispanic_exposure",
"share_other_nh_exposure",
"share_poverty_exposure", # % in high poverty
"meps20_total", # student poverty concentration
"meps20_white",
"meps20_black",
"meps20_hispanic",
"share_in_preschool", # % in pre k
"share_hs_degree", # % with HS degree
"share_employed"
)


data <- data %>%
mutate(state = str_pad(state, width = 2, side ="left", pad = "0"), #set max ub as 1
county = str_pad(county, width = 3, side ="left", pad = "0"))
mutate(
state = str_pad(state, width = 2, side ="left", pad = "0"),
county = str_pad(county, width = 3, side ="left", pad = "0"),
fips = paste0(state, county)
)

data <- data %>%
unite("fips", state, county, sep = "", remove = FALSE) %>%
# group_by(full_fips) %>%
# fill(state_name, .direction = "updown") %>%
# fill(county_name, .direction = "updown") %>%
# ungroup() %>%
unite("state_county", county_name, state_name, sep = ", ", remove = FALSE) %>%
filter(fips %in% params$state_county) %>%
arrange(factor(fips, levels = params$state_county)) %>%
unite("state_county", county_name, state_name, sep = ", ", remove = FALSE) %>%
mutate(state_county = gsub("County", "", state_county)) %>%
mutate(state_county = rm_white_comma(state_county))
mutate(state_county = qdapRegex::rm_white_comma(state_county))

data <- data %>%
mutate(state_county = str_to_title(state_county))
Expand All @@ -28,18 +47,16 @@ prep_data <- function(data) {
# filter to get the ones only existing in the data
perc_vars_in_data <- all_perc_vars[(all_perc_vars %in% colnames(data))]


numeric_vars_one_digit <- data %>%
select(
-matches("average_to_living_wage_ratio"),
-matches("ratio_average_to_living_wage"),
-matches("share_desc_rep"),
-fips,
-matches("_quality"),
-matches("year"),
-starts_with("learning_rate"),
-starts_with("rate_learning"),
-all_of(perc_vars_in_data),
-starts_with("share_burdened"),
-starts_with("pctl"),
-any_of(c("asian_other_pop", "black_nonhispanic_pop", "hispanic_pop", "white_nonhispanic_pop"))
) %>%
select_if(is.numeric) %>%
names()
Expand All @@ -48,30 +65,37 @@ prep_data <- function(data) {
data <- data %>%
mutate_at(vars(ends_with("_quality")),
function(x) recode(x, `1` = "Strong", `2` = "Marginal", `3` = "Weak")) %>%
mutate(
across(
any_of(c("rate_learning", "rate_learning_lb", "rate_learning_ub")),
.fns = ~round(.x, digits = 2)
)
) %>%
# multiple affordable housing variables by 100
mutate_at(vars(matches("share_affordable_")),
mutate_at(vars(matches("share_affordable_[1-9]")),
function(x) x*100) %>%
# update ub for share_hs_degree to 1
mutate_at(vars(matches("share_hs_degree_ub")),
function(x) case_when(x > 1 ~ 1, TRUE ~ x)) %>%
mutate_at(vars(any_of(c("asian_other_pop", "black_nonhispanic_pop", "hispanic_pop", "white_nonhispanic_pop"))),
mutate_at(vars(any_of(c("share_desc_rep_asian_other", "share_desc_rep_black_nonhispanic", "share_desc_rep_white_nonhispanic"))),
function(x) paste0("__:", scales::percent(x))) %>%
# convert to percentage
mutate_at(perc_vars_in_data,
function (x) scales::percent(x, accuracy = 0.1)) %>%
mutate_at(numeric_vars_one_digit,
function(x) {format(round(x, 1), big.mark=",", scientific=FALSE)}) %>%
mutate_at(vars(matches("average_to_living_wage_ratio")), funs(as.numeric)) %>%
mutate_at(vars(matches("average_to_living_wage_ratio")),
mutate_at(vars(matches("ratio_average_to_living_wage")), funs(as.numeric)) %>%
mutate_at(vars(matches("ratio_average_to_living_wage")),
function(x) {format(round(x, 2), big.mark=",",scientific=FALSE)}) %>%
# mutate(na_quality = "NA") %>% #to be used for metrics where there is no quality variable %>%
mutate_at(vars(matches("pctl"), -ends_with("_quality")),
function(x) scales::dollar(x, accuracy = 1)) %>%
# remove whitespace in values
mutate_at(vars(-one_of(c("state_county", "subgroup"))),
function(x) gsub("\\s+", "", x)) %>%
mutate_at(vars(matches("crime_rate"), -ends_with("_quality")),
function(x) gsub("\\..*","", x))
mutate_at(vars(matches("crime_violent_rate"), matches("crime_property_rate"), -ends_with("_quality")),
function(x) gsub("\\..*","", x))



return(data)

Expand Down
2 changes: 1 addition & 1 deletion R/prep_pages.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ prep_pages <- function(url, output_directory, state_title = FALSE, bespoke = TRU
app_list <- read_csv(url)

# format app list
app_list <- app_list %>%
app_list <- app_list %>%
mutate(fips = stringr::str_pad(fips, width = 5, side = 'left', pad = '0')) %>%
mutate(full_list = case_when(!is.na(comparisons) ~ stringr::str_c(fips, comparisons, sep = ';'),
TRUE ~ str_c(fips, sep = ';'))) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/quarto_render_wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' The function is not intended to return anything, but it writes htmls to the
#' output_file path

quarto_render_wrapper <- function(input, output_file, execute_params, dir_name){
quarto_render_wrapper <- function(input, output_file, execute_params, dir_name) {

# create directory
if(!dir.exists(dir_name)){
Expand Down
Loading

0 comments on commit 9dde021

Please sign in to comment.