-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
investigate differences between new AR format and old AR format #8
Comments
We recently learned that the library(tidyverse)
library(pacta.data.preparation)
ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")
ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)
# -------------------------------------------------------------------------
direct_ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Direct Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_debt %>%
filter(company_id %in% ar_advanced_company_indicators$company_id) %>%
group_by(sector) %>%
summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(direct_ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2016 - new_2016) %>%
mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2016 old_n new_2016 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 3.24e12 1302 3.24e12 1302 -1.46e-3 0
#> 2 Cement 2.89e 9 1459 2.73e 9 1448 1.53e+8 5.29
#> 3 Coal 6.31e 9 1349 6.31e 9 1359 0 0
#> 4 HDV 0 283 0 283 0 NaN
#> 5 LDV 9.36e 7 194 9.36e 7 228 0 0
#> 6 Oil&Gas 3.06e11 2864 3.06e11 2864 -2.48e-2 0
#> 7 Power 6.12e 6 29582 6.12e 6 29611 -8.47e-2 0
#> 8 Shipping 1.70e14 9844 1.70e14 9846 -4.38e-1 0
#> 9 Steel 1.66e 9 757 1.66e 9 757 -3.81e-3 0 |
The bespoke library(tidyverse)
library(pacta.data.preparation)
ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")
ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)
# -------------------------------------------------------------------------
equity_ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Equity Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2022 = sum(`2022`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_ownership %>%
filter(technology != "Grinding") %>%
group_by(sector) %>%
summarise(old_2022 = sum(`_2022`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(equity_ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2022 = if_else(is.na(new_2022), as.numeric(0), as.numeric(new_2022)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2022 - new_2022) %>%
mutate(percent_diff = round(diff / old_2022 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2022 old_n new_2022 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 1.00e13 1820 1.00e13 1820 1.17e-2 0
#> 2 Cement 4.67e 9 2035 4.67e 9 2035 -6.73e-4 0
#> 3 Coal 1.15e10 2051 1.15e10 2061 -1.07e-2 0
#> 4 HDV 9.22e 6 575 9.22e 6 575 -6.36e-2 0
#> 5 LDV 1.26e 8 394 1.26e 8 435 -3.52e-1 0
#> 6 Oil&Gas 7.82e11 4568 7.82e11 4563 2.14e-2 0
#> 7 Power 1.60e 7 36917 1.60e 7 36875 -4.49e-1 0
#> 8 Shipping 2.69e14 11167 2.69e14 11169 3.06e+9 0
#> 9 Steel 2.75e 9 1105 2.75e 9 1105 -5.96e-3 0
# -------------------------------------------------------------------------
direct_ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Direct Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2022 = sum(`2022`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_debt %>%
filter(technology != "Grinding") %>%
group_by(sector) %>%
summarise(old_2022 = sum(`_2022`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(direct_ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2022 = if_else(is.na(new_2022), as.numeric(0), as.numeric(new_2022)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2022 - new_2022) %>%
mutate(percent_diff = round(diff / old_2022 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2022 old_n new_2022 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 5.35e12 1302 5.35e12 1302 -0.00684 0
#> 2 Cement 2.74e 9 1448 2.74e 9 1448 0 0
#> 3 Coal 6.62e 9 1349 6.62e 9 1359 0 0
#> 4 HDV 3.63e 6 283 3.63e 6 283 0 0
#> 5 LDV 8.56e 7 194 8.56e 7 228 0 0
#> 6 Oil&Gas 3.51e11 2869 3.51e11 2864 0.0242 0
#> 7 Power 7.59e 6 29641 7.59e 6 29611 -0.122 0
#> 8 Shipping 1.79e14 9844 1.79e14 9846 -0.0625 0
#> 9 Steel 1.69e 9 757 1.69e 9 757 -0.00299 0 |
Blocked: Until AI adds a new consolidation method to their advanced company indicators, which likely will happen in Q3 this year Consolidation method is likely to be called "Credit Risk Parent" consolidation method, or something similar. |
Especially with the recent change with AR, we're probably going to eventually need to switch to their new format. We should investigate the differences and see if we can switch to using the new format.
The text was updated successfully, but these errors were encountered: