From 6eb6b50fb339c8c22c9fe0be95364f1a23abe9b2 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 13 Mar 2024 13:25:41 +0000 Subject: [PATCH 01/38] import council RUC classification --- .../import_council_rural_classification.py | 63 +++++++++++++++++++ hub/management/commands/import_ruc_data.py | 14 +---- 2 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 hub/management/commands/import_council_rural_classification.py diff --git a/hub/management/commands/import_council_rural_classification.py b/hub/management/commands/import_council_rural_classification.py new file mode 100644 index 000000000..0641a784e --- /dev/null +++ b/hub/management/commands/import_council_rural_classification.py @@ -0,0 +1,63 @@ +import pandas as pd +from mysoc_dataset import get_dataset_url + +from hub.import_utils import add_gss_codes, filter_authority_type +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + help = "Import Council RUC data" + + cons_row = "gss_code" + message = "Importing council rural/urban classification" + uses_gss = True + do_not_convert = True + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Rural Urban Classification", + "description": "A composite measure of ‘ruralness’ (based on population density, settlement size, and drive times) standardised across the countries of the UK.", + "data_type": "text", + "category": "place", + "source_label": "Data from ONS (England and Wales), NRS (Scotland), and NISRA (Northern Ireland), collated and standardised by mySociety.", + "source": "https://data.mysociety.org/datasets/uk_ruc_uk_ruc/", + "source_type": "csv", + "table": "areadata", + "data_url": "https://pages.mysociety.org/uk_ruc/downloads/uk-ruc-la-ruc-csv/latest", + "comparators": DataSet.in_comparators(), + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "options": [ + dict(title="Sparse and rural", shader="lightgreen"), + dict(title="Urban with rural areas", shader="lightgrey"), + dict(title="Rural", shader="green"), + dict(title="Urban", shader="grey"), + ], + "unit_type": "raw", + "unit_distribution": "physical_area", + } + + data_sets = { + "constituency_ruc": { + "defaults": defaults, + "col": "ruc-cluster-label", + }, + } + + def get_dataframe(self): + url = get_dataset_url( + repo_name="uk_ruc", + package_name="uk_ruc", + version_name="latest", + file_name="la_ruc.csv", + done_survey=True, + ) + df = pd.read_csv(url) + df = add_gss_codes(df, "local-authority-code") + df = filter_authority_type(df, self.area_type, "gss_code") + + return df diff --git a/hub/management/commands/import_ruc_data.py b/hub/management/commands/import_ruc_data.py index 63c054091..1f99b7766 100644 --- a/hub/management/commands/import_ruc_data.py +++ b/hub/management/commands/import_ruc_data.py @@ -11,10 +11,11 @@ class Command(BaseImportFromDataFrameCommand): cons_row = "gss-code" message = "Importing constituency rural urban classification data" + do_not_convert = True data_sets = { "constituency_ruc": { "defaults": { - "label": "Urban Rural Classification", + "label": "Rural Urban Classification", "description": "A composite measure of 'ruralness' (based population density, settlement size, and drive times) standardised across the countries of the UK.", "data_type": "text", "category": "place", @@ -39,7 +40,7 @@ class Command(BaseImportFromDataFrameCommand): package = { "repo_name": "uk_ruc", "package_name": "uk_ruc", - "version_name": "2.0.0", + "version_name": "latest", "file_name": "pcon_ruc.csv", } @@ -58,12 +59,3 @@ def __init__(self): def get_dataframe(self): return pd.read_csv(self.data_sets["constituency_ruc"]["defaults"]["data_url"]) - - def update_averages(self): - pass - - def update_max_min(self): - pass - - def convert_to_new_con(self): - pass From 0162ca760bf031dfd289018ed9203bef8a6f0a74 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 13 Mar 2024 14:38:37 +0000 Subject: [PATCH 02/38] imports for council climate emergency declarations and net zero targets --- .../import_council_carbon_neutral_data.py | 69 ++++++++++++++++++ .../import_council_emergency_declaration.py | 71 +++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 hub/management/commands/import_council_carbon_neutral_data.py create mode 100644 hub/management/commands/import_council_emergency_declaration.py diff --git a/hub/management/commands/import_council_carbon_neutral_data.py b/hub/management/commands/import_council_carbon_neutral_data.py new file mode 100644 index 000000000..4c527dc86 --- /dev/null +++ b/hub/management/commands/import_council_carbon_neutral_data.py @@ -0,0 +1,69 @@ +import pandas as pd +from mysoc_dataset import get_dataset_url + +from hub.models import Area, AreaData, DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + +declare_map = { + "Y": "Yes", + "N": "No", +} + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + cons_row = "gss_code" + message = "Importing council net zero target data" + uses_gss = True + do_not_convert = True + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Council Net Zero target date", + "data_type": "integer", + "category": "place", + "source_label": "Data from mySociety", + "source": "https://pages.mysociety.org/la-plans-promises/", + "source_type": "csv", + "table": "areadata", + "data_url": "https://pages.mysociety.org/la-plans-promises/downloads/local-authority-climate-emergency-declarations-declarations-csv/latest", + "comparators": DataSet.numerical_comparators(), + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "unit_type": "raw", + "unit_distribution": "physical_area", + } + + data_sets = { + "council_net_zero_date": { + "defaults": defaults, + "col": "year", + }, + } + + def get_dataframe(self): + url = get_dataset_url( + repo_name="la-plans-promises", + package_name="local_authority_net_zero_commitments", + version_name="latest", + file_name="promises.csv", + done_survey=True, + ) + df = pd.read_csv(url) + + councils = [] + for index, row in df.iterrows(): + if pd.isna(row["target"]): + continue + councils.append( + { + "gss_code": row["gss_code"], + "year": row["target"], + } + ) + + df = pd.DataFrame(councils) + + return df diff --git a/hub/management/commands/import_council_emergency_declaration.py b/hub/management/commands/import_council_emergency_declaration.py new file mode 100644 index 000000000..e827ed4c4 --- /dev/null +++ b/hub/management/commands/import_council_emergency_declaration.py @@ -0,0 +1,71 @@ +import pandas as pd +from mysoc_dataset import get_dataset_url + +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + +declare_map = { + "Y": "Yes", + "N": "No", +} + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + cons_row = "gss_code" + message = "Importing council climate emergency declarations" + uses_gss = True + do_not_convert = True + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Council has declared a climate emergency", + "data_type": "text", + "category": "place", + "source_label": "Data from mySociety.", + "source": "https://pages.mysociety.org/la-plans-promises/", + "source_type": "csv", + "table": "areadata", + "data_url": "https://pages.mysociety.org/la-plans-promises/downloads/local-authority-climate-emergency-declarations-declarations-csv/latest", + "comparators": DataSet.in_comparators(), + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "unit_type": "raw", + "unit_distribution": "physical_area", + "options": [ + {"title": "Yes", "shader": "#068670"}, + {"title": "No", "shader": "#DEE2E6"}, + ], + } + + data_sets = { + "council_emergency_declaration": { + "defaults": defaults, + "col": "declared", + }, + } + + def get_dataframe(self): + url = get_dataset_url( + repo_name="la-plans-promises", + package_name="local_authority_climate_emergency_declarations", + version_name="latest", + file_name="declarations.csv", + done_survey=True, + ) + df = pd.read_csv(url) + + councils = [] + for index, row in df.iterrows(): + councils.append( + { + "gss_code": row["gss_code"], + "declared": declare_map.get(row["made_declaration"], "No"), + } + ) + + df = pd.DataFrame(councils) + + return df From 98ade8ccc46f905ba05593ea081d05da0a2dd253 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 13 Mar 2024 18:06:40 +0000 Subject: [PATCH 03/38] add some useful import functions --- hub/import_utils.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 hub/import_utils.py diff --git a/hub/import_utils.py b/hub/import_utils.py new file mode 100644 index 000000000..7776e5956 --- /dev/null +++ b/hub/import_utils.py @@ -0,0 +1,54 @@ +from functools import lru_cache + +import pandas as pd +from mysoc_dataset import get_dataset_url + + +@lru_cache +def get_authority_mapping() -> pd.DataFrame: + """ + Return a dataframe mapping different names to authority code + """ + url = get_dataset_url( + repo_name="uk_local_authority_names_and_codes", + package_name="uk_la_future", + version_name="1", + file_name="lookup_name_to_registry.csv", + done_survey=True, + ) + return pd.read_csv(url) + + +@lru_cache +def get_council_df(): + """ + Return a dataframe of councils that are live or historical as of a given date + """ + url = get_dataset_url( + repo_name="uk_local_authority_names_and_codes", + package_name="uk_la_future", + version_name="1", + file_name="uk_local_authorities_future.csv", + done_survey=True, + ) + return pd.read_csv(url) + + +def add_gss_codes(df: pd.DataFrame, code_column: str): + """ + Given a DataFrame with a column called "authority_code", add a column called "gss_code" + """ + authority_df = get_council_df() + + rows = len(df[code_column]) + df["gss_code"] = pd.Series([None] * rows, index=df.index) + + for index, row in df.iterrows(): + authority_code = row[code_column] + if not pd.isnull(authority_code): + authority_match = authority_df[ + authority_df["local-authority-code"] == authority_code + ] + df.at[index, "gss_code"] = authority_match["gss-code"].values[0] + + return df From 721dfcc595540dc89bfa2b4aabe34af7067bf524 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 13 Mar 2024 18:06:53 +0000 Subject: [PATCH 04/38] imports for council emissions data total emissions for council emissions cluster --- .../import_council_emission_cluster.py | 77 +++++++++++++++++++ .../import_council_emissions_totals.py | 70 +++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 hub/management/commands/import_council_emission_cluster.py create mode 100644 hub/management/commands/import_council_emissions_totals.py diff --git a/hub/management/commands/import_council_emission_cluster.py b/hub/management/commands/import_council_emission_cluster.py new file mode 100644 index 000000000..8802d2ee4 --- /dev/null +++ b/hub/management/commands/import_council_emission_cluster.py @@ -0,0 +1,77 @@ +import pandas as pd +from mysoc_dataset import get_dataset_url + +from hub.import_utils import add_gss_codes +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + +shaders = [ + {"title": "Agriculture", "shader": "green"}, + {"title": "City of London", "shader": "lightgrey"}, + {"title": "Industry/Commerical/Domestic", "shader": "darkblue"}, + {"title": "Public sector", "shader": "lightblue"}, + {"title": "Transport/Domestic", "shader": "lightpurple"}, + {"title": "Urban Mainstream", "shader": "darkgrey"}, +] + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + cons_row = "gss_code" + message = "Importing council emissions data" + uses_gss = True + do_not_convert = True + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Emissions profile", + "data_type": "text", + "category": "place", + "description": "Labels local authorities based on the profile of their 2020 emissions to enable authorities with similar patterns of emissions to be compared.", + "release_date": "January 2022", + "source_label": "Data from the Department of Business, Energy & Industrial Strategy collated and analysed by mySociety.", + "source": "https://pages.mysociety.org/la-emissions-data/", + "source_type": "csv", + "table": "areadata", + "data_url": "https://pages.mysociety.org/la-emissions-data/datasets/uk_local_authority_emissions_data/latest", + "comparators": DataSet.in_comparators(), + "options": shaders, + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "unit_type": "raw", + "unit_distribution": "physical_area", + } + + data_sets = { + "council_emissions_label": { + "defaults": defaults, + "col": "label", + }, + } + + def get_dataframe(self): + url = get_dataset_url( + repo_name="la-emissions-data", + package_name="uk_local_authority_emissions_data", + version_name="latest", + file_name="la_labels.csv", + done_survey=True, + ) + df = pd.read_csv(url) + + df = add_gss_codes(df, "local-authority-code") + + councils = [] + for index, row in df.iterrows(): + councils.append( + { + "gss_code": row["gss_code"], + "label": row["label"], + } + ) + + df = pd.DataFrame(councils) + + return df diff --git a/hub/management/commands/import_council_emissions_totals.py b/hub/management/commands/import_council_emissions_totals.py new file mode 100644 index 000000000..5caaa74ee --- /dev/null +++ b/hub/management/commands/import_council_emissions_totals.py @@ -0,0 +1,70 @@ +import pandas as pd +from mysoc_dataset import get_dataset_url + +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + +declare_map = { + "Y": "Yes", + "N": "No", +} + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + cons_row = "gss_code" + message = "Importing council emissions data" + uses_gss = True + do_not_convert = True + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Total emissions (ktCO2)", + "description": "Estimated 2020 carbon dioxide emissions within the scope of influence of local authorities.", + "data_type": "integer", + "category": "place", + "release_date": "2020", + "source_label": "Data from the Department of Business, Energy & Industrial Strategy collated by mySociety.", + "source": "https://pages.mysociety.org/la-emissions-data/", + "source_type": "csv", + "table": "areadata", + "data_url": "https://pages.mysociety.org/la-emissions-data/datasets/uk_local_authority_emissions_data/latest", + "comparators": DataSet.numerical_comparators(), + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "unit_type": "raw", + "unit_distribution": "physical_area", + } + + data_sets = { + "council_total_emissions": { + "defaults": defaults, + "col": "emissions", + }, + } + + def get_dataframe(self): + url = get_dataset_url( + repo_name="la-emissions-data", + package_name="uk_local_authority_emissions_data", + version_name="latest", + file_name="local_authority_emissions.csv", + done_survey=True, + ) + df = pd.read_csv(url) + df = df.loc[df["Year"] == 2020] + + councils = [] + for index, row in df.iterrows(): + councils.append( + { + "gss_code": row["gss_code"], + "emissions": row["Total Emissions:kt CO2"], + } + ) + + df = pd.DataFrame(councils) + + return df From 56e867b64f0cc62e0b0cbb6df3177dd8af5b6ea2 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 14 Mar 2024 17:15:22 +0000 Subject: [PATCH 05/38] add council type filter function to import utils filters councils based on the area type to avoid many missing councils messages --- hub/import_utils.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/hub/import_utils.py b/hub/import_utils.py index 7776e5956..a9b18cb5d 100644 --- a/hub/import_utils.py +++ b/hub/import_utils.py @@ -3,6 +3,8 @@ import pandas as pd from mysoc_dataset import get_dataset_url +council_types = {"STC": ["CTY", "LBO", "MD", "SCO", "NID", "UA", "WPA"], "DIS": ["NMD"]} + @lru_cache def get_authority_mapping() -> pd.DataFrame: @@ -52,3 +54,22 @@ def add_gss_codes(df: pd.DataFrame, code_column: str): df.at[index, "gss_code"] = authority_match["gss-code"].values[0] return df + + +def _filter_authority_type(df: pd.DataFrame, types: list): + authority_df = get_council_df() + + rows = len(df["gss_code"]) + df["type"] = pd.Series([None] * rows, index=df.index) + for index, row in df.iterrows(): + if not pd.isnull("gss_code"): + authority_match = authority_df[authority_df["gss-code"] == row["gss_code"]] + df.at[index, "type"] = authority_match["local-authority-type"].values[0] + + df = df.loc[df["type"].isin(types)] + + return df + + +def filter_authority_type(df: pd.DataFrame, authority_type: str): + return _filter_authority_type(df, council_types[authority_type]) From dfc967895db74c2dc3aa64ad71bbc7177e771930 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 19 Mar 2024 14:39:33 +0000 Subject: [PATCH 06/38] update council filter to only return current authorities --- hub/import_utils.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/hub/import_utils.py b/hub/import_utils.py index a9b18cb5d..01c0c927e 100644 --- a/hub/import_utils.py +++ b/hub/import_utils.py @@ -1,3 +1,4 @@ +from datetime import date from functools import lru_cache import pandas as pd @@ -56,20 +57,38 @@ def add_gss_codes(df: pd.DataFrame, code_column: str): return df -def _filter_authority_type(df: pd.DataFrame, types: list): +def _filter_authority_type(df: pd.DataFrame, types: list, gss_code: str): authority_df = get_council_df() - rows = len(df["gss_code"]) + today = date.today() + + rows = len(df[gss_code]) df["type"] = pd.Series([None] * rows, index=df.index) + df["start-date"] = pd.Series([None] * rows, index=df.index) + df["end-date"] = pd.Series([None] * rows, index=df.index) for index, row in df.iterrows(): - if not pd.isnull("gss_code"): - authority_match = authority_df[authority_df["gss-code"] == row["gss_code"]] + if not pd.isnull(row[gss_code]): + authority_match = authority_df[authority_df["gss-code"] == row[gss_code]] df.at[index, "type"] = authority_match["local-authority-type"].values[0] + df.at[index, "start-date"] = pd.to_datetime( + authority_match["start-date"].values[0] + ).date() + df.at[index, "end-date"] = pd.to_datetime( + authority_match["end-date"].values[0] + ).date() df = df.loc[df["type"].isin(types)] + # only select authorities with a start date in the past + df = df.loc[(df["start-date"] < today) | df["start-date"].isna()] + + # only select authorities with an end date in the future + df = df.loc[(df["end-date"] > today) | df["end-date"].isna()] + return df -def filter_authority_type(df: pd.DataFrame, authority_type: str): - return _filter_authority_type(df, council_types[authority_type]) +def filter_authority_type( + df: pd.DataFrame, authority_type: str, gss_code: str = "gss-code" +): + return _filter_authority_type(df, council_types[authority_type], gss_code) From 5aafd1cb3a30625649559ce36e57b8d491f90d70 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 19 Mar 2024 14:40:17 +0000 Subject: [PATCH 07/38] update council imports to filter out types not being imported --- hub/management/commands/import_council_carbon_neutral_data.py | 4 +++- hub/management/commands/import_council_data.py | 2 ++ hub/management/commands/import_council_emission_cluster.py | 3 ++- hub/management/commands/import_council_emissions_totals.py | 3 +++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hub/management/commands/import_council_carbon_neutral_data.py b/hub/management/commands/import_council_carbon_neutral_data.py index 4c527dc86..e642b9ff1 100644 --- a/hub/management/commands/import_council_carbon_neutral_data.py +++ b/hub/management/commands/import_council_carbon_neutral_data.py @@ -1,7 +1,8 @@ import pandas as pd from mysoc_dataset import get_dataset_url -from hub.models import Area, AreaData, DataSet +from hub.import_utils import filter_authority_type +from hub.models import DataSet from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin @@ -52,6 +53,7 @@ def get_dataframe(self): done_survey=True, ) df = pd.read_csv(url) + df = filter_authority_type(df, self.area_type, self.cons_row) councils = [] for index, row in df.iterrows(): diff --git a/hub/management/commands/import_council_data.py b/hub/management/commands/import_council_data.py index ffc105adf..698bd4ba5 100644 --- a/hub/management/commands/import_council_data.py +++ b/hub/management/commands/import_council_data.py @@ -1,6 +1,7 @@ import pandas as pd from mysoc_dataset import get_dataset_url +from hub.import_utils import filter_authority_type from hub.models import DataSet from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin @@ -48,5 +49,6 @@ def get_dataframe(self): done_survey=True, ) df = pd.read_csv(url) + df = filter_authority_type(df, self.area_type, self.cons_row) return df diff --git a/hub/management/commands/import_council_emission_cluster.py b/hub/management/commands/import_council_emission_cluster.py index 8802d2ee4..115467c85 100644 --- a/hub/management/commands/import_council_emission_cluster.py +++ b/hub/management/commands/import_council_emission_cluster.py @@ -1,7 +1,7 @@ import pandas as pd from mysoc_dataset import get_dataset_url -from hub.import_utils import add_gss_codes +from hub.import_utils import add_gss_codes, filter_authority_type from hub.models import DataSet from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin @@ -62,6 +62,7 @@ def get_dataframe(self): df = pd.read_csv(url) df = add_gss_codes(df, "local-authority-code") + df = filter_authority_type(df, self.area_type, self.cons_row) councils = [] for index, row in df.iterrows(): diff --git a/hub/management/commands/import_council_emissions_totals.py b/hub/management/commands/import_council_emissions_totals.py index 5caaa74ee..1a55e77a3 100644 --- a/hub/management/commands/import_council_emissions_totals.py +++ b/hub/management/commands/import_council_emissions_totals.py @@ -1,6 +1,7 @@ import pandas as pd from mysoc_dataset import get_dataset_url +from hub.import_utils import add_gss_codes, filter_authority_type from hub.models import DataSet from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin @@ -55,6 +56,8 @@ def get_dataframe(self): ) df = pd.read_csv(url) df = df.loc[df["Year"] == 2020] + df = add_gss_codes(df, "local-authority-code") + df = filter_authority_type(df, self.area_type, "gss_code") councils = [] for index, row in df.iterrows(): From 63256a13ca8036d23be712d1f7934b338404cbfc Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 20 Mar 2024 14:32:25 +0000 Subject: [PATCH 08/38] import if council has a climate action plan --- .../commands/import_council_has_plan.py | 65 +++++++++++++++++++ hub/static/js/explore.esm.js | 2 +- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 hub/management/commands/import_council_has_plan.py diff --git a/hub/management/commands/import_council_has_plan.py b/hub/management/commands/import_council_has_plan.py new file mode 100644 index 000000000..53b3d1d1c --- /dev/null +++ b/hub/management/commands/import_council_has_plan.py @@ -0,0 +1,65 @@ +import pandas as pd +import requests + +from hub.import_utils import filter_authority_type +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + cons_row = "gss_code" + message = "Importing council climate emergency declarations" + uses_gss = True + do_not_convert = True + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Council has a Climate Action Plan", + "data_type": "text", + "category": "place", + "source_label": "Data from mySociety and Climate Emergency UK.", + "source": "http://cape.mysociety.org/", + "source_type": "csv", + "table": "areadata", + "data_url": "https://cape.mysociety.org/api/councils/", + "comparators": DataSet.in_comparators(), + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "unit_type": "raw", + "unit_distribution": "physical_area", + "options": [ + {"title": "Yes", "shader": "#068670"}, + {"title": "No", "shader": "#DEE2E6"}, + ], + } + + data_sets = { + "council_has_plan": { + "defaults": defaults, + "col": "has_plan", + }, + } + + def get_dataframe(self): + results = requests.get("https://cape.mysociety.org/api/councils/") + data = results.json() + + councils = [] + for row in data: + has_plan = "No" + if row["plan_count"] > 0: + has_plan = "Yes" + councils.append( + { + "gss_code": row["gss_code"], + "has_plan": has_plan, + } + ) + + df = pd.DataFrame(councils) + df = filter_authority_type(df, self.area_type, self.cons_row) + + return df diff --git a/hub/static/js/explore.esm.js b/hub/static/js/explore.esm.js index d183583f5..eaad2244a 100644 --- a/hub/static/js/explore.esm.js +++ b/hub/static/js/explore.esm.js @@ -618,7 +618,7 @@ const app = createApp({ case 'filter': return dataset.is_filterable case 'shader': - return ["party", "constituency_ruc", "council_type"].includes(dataset.name) || !["text", "json", "date", "profile_id"].includes(dataset.data_type) && dataset.is_shadable + return ["party", "constituency_ruc", "council_type"].includes(dataset.name) || !["json", "date", "profile_id"].includes(dataset.data_type) && dataset.is_shadable default: return true } From ac30751f05a12e29b8a588329d31348bc114ed7b Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 12:04:05 +0000 Subject: [PATCH 09/38] update area page to allow related categories for place Also tidies up some of the formatting of averages for integer categories and enables integers to be dates (mostly for years) that do not get commas in them. --- hub/templates/hub/area.html | 19 ++++++++++--- hub/views/area.py | 57 +++++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/hub/templates/hub/area.html b/hub/templates/hub/area.html index 025107acc..de202b37d 100644 --- a/hub/templates/hub/area.html +++ b/hub/templates/hub/area.html @@ -497,9 +497,20 @@

{{ dataset.label }}

{{ dataset.data.value.link_text|safe }} {% elif dataset.data_type == "integer" %} -

{{ dataset.data.value|intcomma }}

- {% if data.data.average %} -

{{dataset.data.average|intcomma }} national average

+ {% if dataset.subcategory == "date" %} +

{{ dataset.data.value }}

+ {% else %} +

{{ dataset.data.value|intcomma }}

+ {% endif %} + {% if dataset.data.average %} + {% if dataset.subcategory == "date" %} +

{{dataset.data.average|floatformat:"0" }} national average

+ {% else %} +

{{dataset.data.average|floatformat:"-1g" }} national average

+ {% endif %} + {% endif %} + {% if dataset.related_category %} + {% include 'hub/area/_json_data.html' with dataset=dataset.related_category %} {% endif %} {% elif dataset.is_range and dataset.data|length > 0 %} @@ -609,7 +620,7 @@

{{ dataset.label }}

{% else %}

{{ dataset.data.value|intcomma }}

-

{{dataset.data.average|intcomma }} national average

+

{{dataset.data.average|floatformat:"-1g" }} national average

{% endif %} {% elif dataset.is_range %}
diff --git a/hub/views/area.py b/hub/views/area.py index 072351e2d..c541614d0 100644 --- a/hub/views/area.py +++ b/hub/views/area.py @@ -180,6 +180,21 @@ def get_overlap_info(self, **kwargs): overlap_constituencies[0]["unchanged"] = True return overlap_constituencies + def get_area_country(self, indexed_categories): + country = None + if indexed_categories.get("country", None) is not None: + try: + country = indexed_categories["country"]["data"].value() + except (ValueError, KeyError): + country = None + elif indexed_categories.get("council_country", None) is not None: + try: + country = indexed_categories["council_country"]["data"].value() + except (ValueError, KeyError): + country = None + + return country + def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) @@ -332,33 +347,33 @@ def get_context_data(self, **kwargs): "constituency_foe_group_count": "constituency_foe_groups", "power_postcodes_count": "power_postcodes", "tcc_open_letter_signatories_count": "tcc_open_letter_signatories", + "council_net_zero_date": "council_net_zero_details", } context["is_related_category"] = context["related_categories"].values() categories_to_remove = defaultdict(list) - try: - context["country"] = indexed_categories["country"]["data"].value() - except (ValueError, KeyError): - context["country"] = None - - if context["country"] is not None: - for category, items in categories.items(): - for data_set in items: - if ( - context["related_categories"].get(data_set["db_name"], None) - is not None - ): - data_item = indexed_categories[ - context["related_categories"][data_set["db_name"]] - ] - if len(data_item) > 0: - data_set["related_category"] = data_item - categories_to_remove["movement"].append(data_item) - - if context["country"] in data_set["excluded_countries"]: - categories_to_remove[category].append(data_set) + area_country = self.get_area_country(indexed_categories) + + for category, items in categories.items(): + for data_set in items: + if ( + context["related_categories"].get(data_set["db_name"], None) + is not None + ): + data_item = indexed_categories[ + context["related_categories"][data_set["db_name"]] + ] + if len(data_item) > 0: + data_set["related_category"] = data_item + categories_to_remove[data_set["category"]].append(data_item) + + if ( + area_country is not None + and area_country in data_set["excluded_countries"] + ): + categories_to_remove[category].append(data_set) for category_name, items in categories_to_remove.items(): for item in items: From 5a28bd3defdb4d15eab08160c576f96e3b6447d0 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 12:06:27 +0000 Subject: [PATCH 10/38] improve display of net zero declarations Include a link to the declaration if we have it and details on the scope of the declaration. --- .../import_council_carbon_neutral_data.py | 25 ++++++++++++++++++- hub/templates/hub/area/_json_data.html | 9 +++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/hub/management/commands/import_council_carbon_neutral_data.py b/hub/management/commands/import_council_carbon_neutral_data.py index e642b9ff1..f44b40d9f 100644 --- a/hub/management/commands/import_council_carbon_neutral_data.py +++ b/hub/management/commands/import_council_carbon_neutral_data.py @@ -24,7 +24,8 @@ class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): "label": "Council Net Zero target date", "data_type": "integer", "category": "place", - "source_label": "Data from mySociety", + "subcategory": "date", + "source_label": "Data from mySociety.", "source": "https://pages.mysociety.org/la-plans-promises/", "source_type": "csv", "table": "areadata", @@ -35,6 +36,7 @@ class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): "is_public": True, "unit_type": "raw", "unit_distribution": "physical_area", + "fill_blanks": False, } data_sets = { @@ -42,8 +44,27 @@ class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): "defaults": defaults, "col": "year", }, + "council_net_zero_details": { + "defaults": { + **defaults, + "data_type": "json", + "label": "Net Zero target date details", + "is_filterable": False, + "is_shadable": False, + }, + "col": "scope", + }, } + def get_row_data(self, row, conf): + if conf["col"] == "year": + return row[conf["col"]] + + if pd.isna(row["url"]): + return {"scope": row["scope"]} + else: + return {"scope": row["scope"], "url": row["url"]} + def get_dataframe(self): url = get_dataset_url( repo_name="la-plans-promises", @@ -63,6 +84,8 @@ def get_dataframe(self): { "gss_code": row["gss_code"], "year": row["target"], + "scope": row["scope"], + "url": row["source_url"], } ) diff --git a/hub/templates/hub/area/_json_data.html b/hub/templates/hub/area/_json_data.html index e1c49ee0f..4ea4e1647 100644 --- a/hub/templates/hub/area/_json_data.html +++ b/hub/templates/hub/area/_json_data.html @@ -35,6 +35,15 @@ {% endfor %}
+{% elif dataset.name.lower == "net zero details" %} +

+ Scope of declaration: + {% if dataset.data.json.url %} + {{ dataset.data.json.scope }} + {% else %} + {{ data.scope }} + {% endif %} +

{% elif dataset.subcategory == "groups" %}
    {% for data in dataset.data.json %} From 27ab6fc0a0fbcb9620d7db3614a8cbdd9799d186 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 12:07:10 +0000 Subject: [PATCH 11/38] import council countries --- .../commands/import_area_countries.py | 2 +- .../commands/import_council_type.py | 22 ++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hub/management/commands/import_area_countries.py b/hub/management/commands/import_area_countries.py index 21bcc821b..37c7e7389 100644 --- a/hub/management/commands/import_area_countries.py +++ b/hub/management/commands/import_area_countries.py @@ -33,7 +33,7 @@ def create_data_type(self): name="country", defaults={ "data_type": "text", - "description": "The country that the constituency is in.", + "description": "", "release_date": str(date.today()), "label": "Country of the UK", "source_label": "Data from mySociety.", diff --git a/hub/management/commands/import_council_type.py b/hub/management/commands/import_council_type.py index d3bd78ce1..c3651a7f4 100644 --- a/hub/management/commands/import_council_type.py +++ b/hub/management/commands/import_council_type.py @@ -1,3 +1,5 @@ +from datetime import date + import pandas as pd from hub.models import DataSet @@ -19,6 +21,14 @@ ], } + +country_shades = [ + {"title": "England", "shader": "#f8f9fa"}, + {"title": "Wales", "shader": "#cc3517"}, + {"title": "Scotland", "shader": "#202448"}, + {"title": "Northern Ireland", "shader": "#458945"}, +] + type_map = { "STC": { "LBO": "London Borough", @@ -51,7 +61,7 @@ class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): "data_type": "text", "category": "place", "subcategory": "", - "release_date": "February 2023", + "release_date": str(date.today()), "label": "Council type", "source_label": "Data from mySociety.", "source": "https://mapit.mysociety.org/", @@ -73,6 +83,15 @@ class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): "defaults": defaults, "col": "council-type", }, + "country": { + "defaults": { + **defaults, + "description": "", + "label": "Country of the UK", + "options": country_shades, + }, + "col": "country", + }, } def get_dataframe(self): @@ -84,6 +103,7 @@ def get_dataframe(self): { "gss-code": area["codes"]["gss"], "council-type": type_map[self.area_type][area["type"]], + "country": area["country_name"], } ) From 427ce8ded78fd625c5ddf4e7b7c70f6696095fd7 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 14:52:39 +0000 Subject: [PATCH 12/38] update LatLon generator to add columns for all area types --- hub/management/commands/base_generators.py | 132 +++++++++++++++++---- 1 file changed, 107 insertions(+), 25 deletions(-) diff --git a/hub/management/commands/base_generators.py b/hub/management/commands/base_generators.py index f20248f2f..427e193cf 100644 --- a/hub/management/commands/base_generators.py +++ b/hub/management/commands/base_generators.py @@ -15,8 +15,28 @@ RateLimitException, ) +mapit_types = { + "LBO": "STC", + "UTA": "STC", + "COI": "STC", + "LGD": "STC", + "CTY": "STC", + "MTD": "STC", + "NMD": "DIS", + "DIS": "DIS", + "WMC": "WMC", + "WMCF": "WMC23", +} + class BaseLatLonGeneratorCommand(BaseCommand): + uses_gss = False + uses_postcodes = False + out_file = None + location_col = "lat_lon" + legacy_col = "area" + cols = ["WMC", "WMC23", "STC", "DIS"] + tqdm.pandas() def get_dataframe(self): @@ -25,53 +45,112 @@ def get_dataframe(self): return df - def _process_lat_long(self, lat_lon=None, row_name=None): - lat = lat_lon[0] - lon = lat_lon[1] - - if not pd.isna(lat) and not pd.isna(lon): + def _process_location(self, lat_lon=None, postcode=None, row_name=None): + lat, lon = None, None + if lat_lon is not None: + lat = lat_lon[0] + lon = lat_lon[1] + + cols = [self.legacy_col, *self.cols] + if (self.uses_postcodes and not pd.isna(postcode)) or ( + not pd.isna(lat) and not pd.isna(lon) + ): + areas = {} try: mapit = MapIt() - gss_codes = mapit.wgs84_point_to_gss_codes(lon, lat) - - area = Area.objects.filter(gss__in=gss_codes).first() - if area: - return area.name + if self.uses_postcodes: + gss_codes = mapit.postcode_point_to_gss_codes_with_type(postcode) else: - return None + gss_codes = mapit.wgs84_point_to_gss_codes_with_type(lon, lat) + + for area_type, code in gss_codes.items(): + if mapit_types.get(area_type, None) is not None: + if self.uses_gss: + areas[mapit_types[area_type]] = code + else: + area = Area.objects.filter( + gss=code, area_type__code=mapit_types[area_type] + ).first() + areas[mapit_types[area_type]] = area.name + else: + continue except ( NotFoundException, BadRequestException, InternalServerErrorException, ForbiddenException, ) as error: - print(f"Error fetching row {row_name} with {lat}, {lon}: {error}") - return None + location_data = lat_lon + if self.uses_postcodes: + location_data = postcode + self.stderr.write( + f"Error fetching row {row_name} with {location_data}: {error}" + ) + return pd.Series([None for t in cols], index=cols) except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") + self.stderr.write(f"Mapit Error - {error}, waiting for a minute") sleep(60) return False + + areas[self.legacy_col] = areas.get("WMC", None) + vals = [areas.get(t, None) for t in cols] + return pd.Series(vals, index=cols) else: - print(f"missing lat or lon for row {row_name}") - return None + self.stderr.write(f"missing location data for row {row_name}") + return pd.Series([None for t in cols], index=cols) - def process_lat_long(self, lat_lon=None, row_name=None): - success = self._process_lat_long(lat_lon=lat_lon, row_name=row_name) + def process_location(self, lat_lon=None, postcode=None, row_name=None): + success = self._process_location( + lat_lon=lat_lon, postcode=postcode, row_name=row_name + ) # retry once if it fails so we can catch rate limit errors if success is False: - return self._process_lat_long(lat_lon=lat_lon, row_name=row_name) + return self._process_location( + lat_lon=lat_lon, postcode=postcode, row_name=row_name + ) else: return success + def get_location_from_row(self, row): + if self.uses_postcodes: + return {"postcode": row["postcode"]} + else: + return {"lat_lon": [row["lat"], row["lon"]]} + def process_data(self, df): if not self._quiet: - self.stdout.write("Generating Area name from lat + lon values") + self.stdout.write("Generating Area details from location values") - df["area"] = df.progress_apply( - lambda row: self.process_lat_long( - self.get_lat_lon_from_row(row), row[self.row_name] - ), - axis=1, + if not self._ignore and self.out_file is not None: + try: + # check that we've got all the output we're expecting before using + # the old values + old_df = pd.read_csv(self.out_file) + usecols = list(set(self.cols).intersection(df.columns)) + if len(usecols) == len(self.cols): + old_df = pd.read_csv( + self.out_file, usecols=[self.lat_lon_row, *self.cols] + ) + location_lookup = { + row[self.location_col]: row[self.legacy_col] + for index, row in old_df.iterrows() + } + if not self._quiet: + self.stdout.write("Reading codes from existing file") + df[self.legacy_col] = df.apply( + lambda row: location_lookup.get((row[self.location_col]), None), + axis=1, + ) + except FileNotFoundError: + self.stderr.write("No existing file.") + + df = df.join( + df.progress_apply( + lambda row: self.process_location( + row_name=row[self.row_name], **self.get_location_from_row(row) + ), + axis=1, + ) ) return df @@ -89,6 +168,9 @@ def add_arguments(self, parser): def handle(self, quiet=False, ignore=False, *args, **options): self._quiet = quiet + + if not self._quiet: + self.stdout.write(self.message) self._ignore = ignore df = self.get_dataframe() out_df = self.process_data(df) From fe1f54913804ea5f63869f729e706a7474446bfd Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 14:53:43 +0000 Subject: [PATCH 13/38] add mapit calls to get GSS codes by area type from lat/lon and pc --- utils/mapit.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/utils/mapit.py b/utils/mapit.py index bc55753e3..10c4d5036 100644 --- a/utils/mapit.py +++ b/utils/mapit.py @@ -75,6 +75,15 @@ def postcode_point_to_gss_codes(self, pc): gss_codes.append(area["codes"]["gss"]) return gss_codes + def postcode_point_to_gss_codes_with_type(self, pc): + url = self.postcode_url % (self.base, pc, settings.MAPIT_API_KEY) + data = self.get(url) + gss_codes = {} + for area in data["areas"].values(): + if "gss" in area["codes"]: + gss_codes[area["type"]] = area["codes"]["gss"] + return gss_codes + def wgs84_point_to_gss_codes(self, lon, lat): url = self.wgs84_url % (self.base, lon, lat, settings.MAPIT_API_KEY) data = self.get(url) @@ -84,6 +93,15 @@ def wgs84_point_to_gss_codes(self, lon, lat): gss_codes.append(area["codes"]["gss"]) return gss_codes + def wgs84_point_to_gss_codes_with_type(self, lon, lat): + url = self.wgs84_url % (self.base, lon, lat, settings.MAPIT_API_KEY) + data = self.get(url) + gss_codes = {} + for area in data.values(): + if "gss" in area["codes"]: + gss_codes[area["type"]] = area["codes"]["gss"] + return gss_codes + def areas_of_type(self, types): url = self.areas_url % (self.base, ",".join(types), settings.MAPIT_API_KEY) data = self.get(url) From dc0b5d971db66cd7aa076c5650bf2239f19323e9 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 14:57:08 +0000 Subject: [PATCH 14/38] fix base constituency count importer to handle multiple area types use a lookup for the cons column if present --- hub/management/commands/base_importers.py | 37 ++++++++++++++----- .../commands/import_hnh_polling_data.py | 3 +- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/hub/management/commands/base_importers.py b/hub/management/commands/base_importers.py index 0e1b7dcb1..e592b28d9 100644 --- a/hub/management/commands/base_importers.py +++ b/hub/management/commands/base_importers.py @@ -1,4 +1,3 @@ -from functools import cache from time import sleep from django.core.management.base import BaseCommand @@ -27,6 +26,7 @@ def handle(self, *args, **options): class BaseAreaImportCommand(BaseCommand): area_type = "WMC" + uses_gss = False def __init__(self): super().__init__() @@ -53,10 +53,15 @@ def delete_data(self): data_type=data_type, area__area_type__code=self.area_type ).delete() - @cache def get_area_type(self): return AreaType.objects.get(code=self.area_type) + def get_cons_col(self): + if hasattr(self, "cons_col_map"): + return self.cons_col_map[self.area_type] + + return self.cons_col + def add_data_sets(self, df=None): for name, config in self.data_sets.items(): label = self.get_label(config) @@ -175,6 +180,15 @@ def convert_to_new_con(self): data_type, delete_old=True, quiet=self._quiet ) + def handle(self, quiet=False, *args, **kwargs): + self._quiet = quiet + self.add_data_sets() + self.delete_data() + self.process_data() + self.update_averages() + self.update_max_min() + self.convert_to_new_con() + class BaseImportFromDataFrameCommand(BaseAreaImportCommand): uses_gss = True @@ -278,26 +292,29 @@ def process_lat_long(self, lat=None, lon=None, row_name=None): class BaseConstituencyGroupListImportCommand(BaseAreaImportCommand): - use_gss = False + do_not_convert = True def process_data(self): df = self.get_df() if not self._quiet: - self.stdout.write(self.message) + self.stdout.write(f"{self.message} ({self.area_type})") group_by = "constituency" - if self.use_gss: + if self.uses_gss: group_by = "gss" + if hasattr(self, "area_types"): + group_by = self.cons_col_map[self.area_type] for lookup, data in tqdm(df.groupby(group_by)): try: area = Area.objects.filter(area_type__code=self.area_type) - if self.use_gss: + if self.uses_gss: area = area.get(gss=lookup) else: area = area.get(name=lookup) except Area.DoesNotExist: + self.stderr.write(f"no area found for {lookup} and {self.area_type}") continue json = [] @@ -328,16 +345,18 @@ def handle(self, quiet=False, *args, **kwargs): class BaseConstituencyCountImportCommand(BaseAreaImportCommand): + do_not_convert = True + def set_data_type(self): self.data_type = list(self.data_types.values())[0] def get_dataframe(self): df = pd.read_csv(self.data_file) - df = df.astype({self.cons_col: "str"}) + df = df.astype({self.get_cons_col(): "str"}) return df def _get_areas_from_row(self, row): - value = row[self.cons_col] + value = row[self.get_cons_col()] if self.uses_gss: areas = Area.objects.filter(gss__in=value.split(",")) else: @@ -347,7 +366,7 @@ def _get_areas_from_row(self, row): def process_data(self, df): if not self._quiet: - self.stdout.write(self.message) + self.stdout.write(f"{self.message} ({self.area_type})") for index, row in tqdm(df.iterrows(), disable=self._quiet, total=df.shape[0]): areas = self._get_areas_from_row(row) diff --git a/hub/management/commands/import_hnh_polling_data.py b/hub/management/commands/import_hnh_polling_data.py index cbf461575..5dc051df1 100644 --- a/hub/management/commands/import_hnh_polling_data.py +++ b/hub/management/commands/import_hnh_polling_data.py @@ -349,6 +349,7 @@ def log(self, message): def extract_and_save_data(self): self.log(self.message) + area_type = self.get_area_type() for file in self.files: self.log(file["defaults"]["label"]) @@ -371,7 +372,7 @@ def extract_and_save_data(self): data_type, created = DataType.objects.update_or_create( data_set=data_set, name=data_type_slug, - area_type=self.get_area_type(), + area_type=area_type, defaults={ "data_type": "percent", "label": col["label"], From 0e4c4baf9e13e6ed8448c5f6aa86058149169b20 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 21 Mar 2024 14:58:05 +0000 Subject: [PATCH 15/38] import GWGW 2022 council event details --- .../commands/generate_gbgw_2022_data.py | 7 ++-- .../commands/generate_gbgw_2023_data.py | 5 ++- .../commands/import_council_gbgw_events.py | 39 +++++++++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 hub/management/commands/import_council_gbgw_events.py diff --git a/hub/management/commands/generate_gbgw_2022_data.py b/hub/management/commands/generate_gbgw_2022_data.py index 59515c3fc..8263bf0c5 100644 --- a/hub/management/commands/generate_gbgw_2022_data.py +++ b/hub/management/commands/generate_gbgw_2022_data.py @@ -4,11 +4,12 @@ class Command(BaseLatLonGeneratorCommand): - help = "Generate CSV file of WI Groups'" + help = "Generate CSV file of GBGW 2022 events" + message = "Generating a CSV of areas for 2022 GBGW events" data_file = settings.BASE_DIR / "data" / "gbgw_events.csv" out_file = settings.BASE_DIR / "data" / "gbgw_events_processed.csv" row_name = "Organisation name" - def get_lat_lon_from_row(self, row): - return row.Latitude, row.Longitude + def get_location_from_row(self, row): + return {"lat_lon": [row["Latitude"], row["Longitude"]]} diff --git a/hub/management/commands/generate_gbgw_2023_data.py b/hub/management/commands/generate_gbgw_2023_data.py index bec4299cf..eac09c090 100644 --- a/hub/management/commands/generate_gbgw_2023_data.py +++ b/hub/management/commands/generate_gbgw_2023_data.py @@ -5,10 +5,11 @@ class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of GBGW 2023 events with area name'" + message = "Generating a CSV of areas for 2022 GBGW events" data_file = settings.BASE_DIR / "data" / "gbgw_events_23.csv" out_file = settings.BASE_DIR / "data" / "gbgw_events_23_processed.csv" row_name = "Organisation" - def get_lat_lon_from_row(self, row): - return row.Lat, row.Long + def get_location_from_row(self, row): + return {"lat_lon": [row.Lat, row.Long]} diff --git a/hub/management/commands/import_council_gbgw_events.py b/hub/management/commands/import_council_gbgw_events.py new file mode 100644 index 000000000..d17bb1172 --- /dev/null +++ b/hub/management/commands/import_council_gbgw_events.py @@ -0,0 +1,39 @@ +from django.conf import settings + +from hub.models import DataSet + +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin + + +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): + help = "Import data about number of GBGW events per council" + message = "Importing 2022 GBGW events" + uses_gss = False + + data_file = settings.BASE_DIR / "data" / "gbgw_events_processed.csv" + cons_col_map = { + "STC": "STC", + "DIS": "DIS", + } + area_types = ["STC", "DIS"] + + data_sets = { + "council_gbgw_2022_event_count": { + "defaults": { + "label": "Number of Great Big Green Week 2022 events", + "release_date": "October 2022", + "data_type": "integer", + "category": "movement", + "subcategory": "events", + "source_label": "Data from The Climate Coalition.", + "source": "https://greatbiggreenweek.com/", + "source_type": "google sheet", + "data_url": "", + "table": "areadata", + "default_value": 10, + "comparators": DataSet.numerical_comparators(), + "unit_type": "raw", + "unit_distribution": "people_in_area", + } + } + } From 7cea94815513bb5d4c384053881b2ea5549e1e38 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 14:49:14 +0000 Subject: [PATCH 16/38] add council areas to WI CSV generator --- .../commands/generate_wi_groups_csv.py | 105 ++---------------- 1 file changed, 10 insertions(+), 95 deletions(-) diff --git a/hub/management/commands/generate_wi_groups_csv.py b/hub/management/commands/generate_wi_groups_csv.py index 4dc896d99..979b14b27 100644 --- a/hub/management/commands/generate_wi_groups_csv.py +++ b/hub/management/commands/generate_wi_groups_csv.py @@ -1,29 +1,21 @@ import re -from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd import requests -from tqdm import tqdm -from hub.models import Area -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand GROUPS_URL = "https://wi-search.squiz.cloud/s/search.json?collection=nfwi-federations&profile=_default&query=!null&sort=prox&sort=prox&start_rank=1&origin=54.093409,-2.89479&maxdist=9999&num_ranks=9999" -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of WI Groups'" - tqdm.pandas() + message = "Generating a CSV of areas for WI groups" + + row_name = "group_name" + uses_gss = True out_file = settings.BASE_DIR / "data" / "wi_groups.csv" @@ -40,88 +32,11 @@ def get_dataframe(self): df.columns = ["group_name", "url", "lat_lon"] return df - def _process_lat_long(self, lat_lon=None, row_name=None): + def get_location_from_row(self, row): try: - lat, lon = re.split(r"[,;]", lat_lon) + lat, lon = re.split(r"[,;]", row.lat_lon) except ValueError: - print(f"bad lat_lon for row {row_name} - {lat_lon}") - return None - if not pd.isna(lat) and not pd.isna(lon): - try: - mapit = MapIt() - gss_codes = mapit.wgs84_point_to_gss_codes(lon, lat) - - area = Area.objects.filter(gss__in=gss_codes).first() - if area: - return area.name - else: - return None - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row {row_name} with {lat}, {lon}: {error}") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return False - else: - print(f"missing lat or lon for row {row_name}") + print(f"bad lat_lon for row {row[self.row_name]}") return None - def process_lat_long(self, lat_lon=None, row_name=None): - success = self._process_lat_long(lat_lon=lat_lon, row_name=row_name) - # retry once if it fails so we can catch rate limit errors - if success is False: - return self._process_lat_long(lat_lon=lat_lon, row_name=row_name) - else: - return success - - def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating GSS codes from lat + lon values") - if not self._ignore: - # Download existing csv, if it exists, so that data isn't updated redundantly - try: - old_df = pd.read_csv(self.out_file, usecols=["lat_lon", "area"]) - lat_long_lookup = { - row.lat_lon: row.area for index, row in old_df.iterrows() - } - if not self._quiet: - self.stdout.write("Reading codes from existing file") - df["area"] = df.apply( - lambda row: lat_long_lookup.get((row.lat_lon), None), axis=1 - ) - except FileNotFoundError: - print("No existing file.") - - if not self._quiet: - self.stdout.write("Generating GSS codes for new WI groups") - df["area"] = df.progress_apply( - lambda row: self.process_lat_long(row.lat_lon, row.group_name) - if "area" not in row - else row["area"], - axis=1, - ) - return df - - def save_data(self, df): - df.to_csv(self.out_file, index=False) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - parser.add_argument( - "-i", "--ignore", action="store_true", help="Ignore existing data file" - ) - - def handle(self, quiet=False, ignore=False, *args, **options): - self._quiet = quiet - self._ignore = ignore - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) + return {"lat_lon": [lat, lon]} From 7cc910acdf873315df027b4459ab2f61d87dc6cc Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 14:49:42 +0000 Subject: [PATCH 17/38] add council areas to foodbank CSV generator --- .../commands/generate_foodbank_count_csv.py | 73 ++----------------- 1 file changed, 7 insertions(+), 66 deletions(-) diff --git a/hub/management/commands/generate_foodbank_count_csv.py b/hub/management/commands/generate_foodbank_count_csv.py index 6ebf8c798..1fddf6362 100644 --- a/hub/management/commands/generate_foodbank_count_csv.py +++ b/hub/management/commands/generate_foodbank_count_csv.py @@ -1,30 +1,25 @@ import json -from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd -from tqdm import tqdm -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of foodbanks with constituency from trussell trust" + message = "Generating a CSV of areas for foodbanks" data_file = ( settings.BASE_DIR / "data" / "trussell-trust-foodbank-groups-and-branches.json" ) out_file = settings.BASE_DIR / "data" / "foodbanks_per_constituency.csv" + uses_gss = True + legacy_col = "gss" + row_name = "name" + def get_dataframe(self): out_data = [] with open(self.data_file) as f: @@ -54,57 +49,3 @@ def get_dataframe(self): df = pd.DataFrame(columns=["name", "lat", "lon"], data=out_data) return df - - def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating foodbank per constituency data") - - out = [] - - for index, row in tqdm(df.iterrows(), disable=self._quiet, total=df.shape[0]): - name = row["name"] - lat = row["lat"] - lon = row["lon"] - - if not pd.isna(lat) and not pd.isna(lon): - try: - mapit = MapIt() - gss_codes = mapit.wgs84_point_to_gss_codes(lon, lat) - - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row {name} with {lat}, {lon}: {error}") - continue - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - continue - else: - print(f"missing lat or lon for row {name}") - continue - - out.append([name, ",".join(gss_codes)]) - - if index > 0 and index % 50 == 0: - sleep(10) - - out_df = pd.DataFrame(columns=["name", "gss"], data=out) - return out_df - - def save_data(self, df): - df.to_csv(self.out_file) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - - def handle(self, quiet=False, *args, **options): - self._quiet = quiet - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) From 07ed60ed531128bf05eb39eb6d69e6af1c8fcf99 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 14:50:25 +0000 Subject: [PATCH 18/38] add council areas to national trust property CSV generator --- .../generate_national_trust_properties_csv.py | 105 +----------------- 1 file changed, 5 insertions(+), 100 deletions(-) diff --git a/hub/management/commands/generate_national_trust_properties_csv.py b/hub/management/commands/generate_national_trust_properties_csv.py index 45fcb7a3c..8c6c0cea2 100644 --- a/hub/management/commands/generate_national_trust_properties_csv.py +++ b/hub/management/commands/generate_national_trust_properties_csv.py @@ -1,30 +1,21 @@ import json -from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd -from tqdm import tqdm -from hub.models import Area -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of National Trust Properties from JSON" - tqdm.pandas() + message = "Generating a CSV of areas for NT properties" in_file = settings.BASE_DIR / "data" / "national_trust_properties.json" out_file = settings.BASE_DIR / "data" / "national_trust_properties.csv" + row_name = "name" + def get_dataframe(self): with open(self.in_file) as f: data = json.load(f) @@ -38,95 +29,9 @@ def get_dataframe(self): "lat_lon": f"{p['POINT_X']},{p['POINT_Y']}", "lat": p["POINT_Y"], "lon": p["POINT_X"], - "area": "", } ) df = pd.DataFrame.from_records(properties) return df - - def _process_lat_long(self, lat=None, lon=None, row_name=None): - - if not pd.isna(lat) and not pd.isna(lon): - try: - mapit = MapIt() - gss_codes = mapit.wgs84_point_to_gss_codes(lon, lat) - - area = Area.objects.filter(gss__in=gss_codes).first() - if area: - return area.name - else: - return None - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row {row_name} with {lat}, {lon}: {error}") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return False - else: - print(f"missing lat or lon for row {row_name}") - return None - - def process_lat_long(self, lat=None, lon=None, row_name=None): - success = self._process_lat_long(lat=lat, lon=lon, row_name=row_name) - # retry once if it fails so we can catch rate limit errors - if success is False: - return self._process_lat_long(lat=lat, lon=lon, row_name=row_name) - else: - return success - - def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating GSS codes from lat + lon values") - if not self._ignore: - # Download existing csv, if it exists, so that data isn't updated redundantly - try: - old_df = pd.read_csv( - self.out_file, usecols=["lat_lon", "lat", "lon", "area"] - ) - lat_long_lookup = { - row.lat_lon: row.area for index, row in old_df.iterrows() - } - if not self._quiet: - self.stdout.write("Reading codes from existing file") - df["area"] = df.apply( - lambda row: lat_long_lookup.get((row.lat_lon), None), axis=1 - ) - except FileNotFoundError: - print("No existing file.") - - if not self._quiet: - self.stdout.write("Generating GSS codes for new national trust properties") - - df["area"] = df.progress_apply( - lambda row: self.process_lat_long(row.lat, row.lon, row.name) - if pd.isna(row.area) - else row.area, - axis=1, - ) - return df - - def save_data(self, df): - df.to_csv(self.out_file, index=False) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - parser.add_argument( - "-i", "--ignore", action="store_true", help="Ignore existing data file" - ) - - def handle(self, quiet=False, ignore=False, *args, **options): - self._quiet = quiet - self._ignore = ignore - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) From c4a0d2d173e16d1ef759d6087be3d33776816fd5 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 14:51:47 +0000 Subject: [PATCH 19/38] add council areas to RSPB reserve CSV generator --- .../generate_rspb_nature_reserves_csv.py | 71 ++----------------- 1 file changed, 7 insertions(+), 64 deletions(-) diff --git a/hub/management/commands/generate_rspb_nature_reserves_csv.py b/hub/management/commands/generate_rspb_nature_reserves_csv.py index 0d5e65ada..c9a75121e 100644 --- a/hub/management/commands/generate_rspb_nature_reserves_csv.py +++ b/hub/management/commands/generate_rspb_nature_reserves_csv.py @@ -1,80 +1,23 @@ -from time import sleep - from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd -from tqdm import tqdm -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of RSPB nature reserves in each constituency" + message = "Generating a CSV of areas for RSPB nature reserves" data_file = settings.BASE_DIR / "data" / "rspb_reserves_centroids.csv" out_file = settings.BASE_DIR / "data" / "rspb_reserves.csv" + uses_gss = True + row_name = "name" + legacy_col = "gss" + def get_dataframe(self): df = pd.read_csv(self.data_file, usecols=["Name", "xcoord", "ycoord"]) df = df.rename(columns={"Name": "name", "xcoord": "lon", "ycoord": "lat"}) df.name = df.name.str.title() return df - - def process_data(self, df): - out = [] - if not self._quiet: - self.stdout.write("Generating processed RSPB reserve file") - for index, row in tqdm(df.iterrows(), disable=self._quiet, total=df.shape[0]): - name = row["name"] - lat = row["lat"] - lon = row["lon"] - - if not pd.isna(lat) and not pd.isna(lon): - try: - mapit = MapIt() - gss_codes = mapit.wgs84_point_to_gss_codes(lon, lat) - - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row {name} with {lat}, {lon}: {error}") - continue - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - continue - else: - print(f"missing lat or lon for row {name}") - continue - out.append([name, ",".join(gss_codes)]) - - if index > 0 and index % 50 == 0: - sleep(10) - - out_df = pd.DataFrame(columns=["name", "gss"], data=out) - return out_df - - def save_data(self, df): - df.to_csv(self.out_file) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - - def handle(self, quiet=False, *args, **options): - self._quiet = quiet - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) From 0dc6821fce54bfb8177076ec5e7b9259163b84af Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 18:01:43 +0000 Subject: [PATCH 20/38] add council areas to power postcode generator --- ...nerate_aid_alliance_power_postcodes_csv.py | 73 +++---------------- 1 file changed, 10 insertions(+), 63 deletions(-) diff --git a/hub/management/commands/generate_aid_alliance_power_postcodes_csv.py b/hub/management/commands/generate_aid_alliance_power_postcodes_csv.py index 06c03edfc..4c49e3e86 100644 --- a/hub/management/commands/generate_aid_alliance_power_postcodes_csv.py +++ b/hub/management/commands/generate_aid_alliance_power_postcodes_csv.py @@ -1,32 +1,27 @@ -from time import sleep - from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd import requests from bs4 import BeautifulSoup -from tqdm import tqdm -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand POSTCODES_URL = ( "http://www.google.com/maps/d/kml?forcekml=1&mid=15b_tQI0t58rLcBTgFytu2e73jyKrrxFr" ) -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of Aid Alliance's 'power postcodes'" + message = "Generating a CSV of areas for aid alliance power postcodes" out_file = settings.BASE_DIR / "data" / "aid_alliance_power_postcodes.csv" + row_name = "name" + uses_gss = True + legacy_col = "gss" + uses_postcodes = True + def get_dataframe(self): response = requests.get(POSTCODES_URL) soup = BeautifulSoup(response.content, "xml") @@ -49,53 +44,5 @@ def get_dataframe(self): df = df.applymap(str.strip) return df - def get_gss_code(self, mapit, postcode): - try: - gss_code = mapit.postcode_point_to_gss_codes(postcode) - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row postcode: {postcode} - {error} raised") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return self.get_gss_code(mapit, postcode) - return ",".join(gss_code) - - def get_all_gss_codes_from_mapit(self, df): - df = df.copy() - - gss_codes = [] - mapit = MapIt() - for postcode in tqdm(df.postcode): - gss_code = None - if postcode: - gss_code = self.get_gss_code(mapit, postcode) - gss_codes.append(gss_code) - return pd.Series(gss_codes, name="gss") - - def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating GSS codes from postcodes") - - gss_codes = self.get_all_gss_codes_from_mapit(df) - df = pd.concat([df, gss_codes], axis=1) - return df - - def save_data(self, df): - df.to_csv(self.out_file, index=False) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - - def handle(self, quiet=False, *args, **options): - self._quiet = quiet - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) + def get_location_from_row(self, row): + return {"postcode": row["postcode"]} From 50cd90e6328d8e7dad2b6e2b5d047a35561795d6 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 18:02:05 +0000 Subject: [PATCH 21/38] add council areas to onshore windfarm generator --- .../commands/generate_onshore_windfarm_csv.py | 76 +++---------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/hub/management/commands/generate_onshore_windfarm_csv.py b/hub/management/commands/generate_onshore_windfarm_csv.py index 7794c6a4a..7c7f2c936 100644 --- a/hub/management/commands/generate_onshore_windfarm_csv.py +++ b/hub/management/commands/generate_onshore_windfarm_csv.py @@ -1,30 +1,25 @@ import re -from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd -from tqdm import tqdm -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of windfarms with constituency from wikipedia" + message = "Generating a CSV of areas for onshore windfarms" url = ( "https://en.wikipedia.org/wiki/List_of_onshore_wind_farms_in_the_United_Kingdom" ) out_file = settings.BASE_DIR / "data" / "windfarms_per_constituency.csv" + row_name = "Wind farm" + legacy_col = "gss" + uses_gss = True + def get_dataframe(self): dfs = pd.read_html(self.url, match="Wind Farm", displayed_only=False) @@ -33,57 +28,8 @@ def get_dataframe(self): return df - def process_data(self, df): - if not self._quiet: - self.stdout.write("Turning wikipedia data in cons") - - out = [] - - for index, row in tqdm(df.iterrows(), disable=self._quiet, total=df.shape[0]): - _, _, decimal = row["Coordinates"].split("/") - lat, lon = re.findall(r"[\-\d.]+", decimal)[0:2] - - name = row["Wind farm"] - - if not pd.isna(lat) and not pd.isna(lon): - try: - mapit = MapIt() - gss_codes = mapit.wgs84_point_to_gss_codes(lon, lat) - - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row {name} with {lat}, {lon}: {error}") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return False - else: - print(f"missing lat or lon for row {name}") - return None - - out.append([name, ",".join(gss_codes), row["Cap. (MW)"]]) - - if index > 0 and index % 50 == 0: - sleep(10) - - out_df = pd.DataFrame(columns=["name", "gss", "capacity"], data=out) - return out_df - - def save_data(self, df): - df.to_csv(self.out_file) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) + def get_location_from_row(self, row): + _, _, decimal = row["Coordinates"].split("/") + lat, lon = re.findall(r"[\-\d.]+", decimal)[0:2] - def handle(self, quiet=False, *args, **options): - self._quiet = quiet - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) + return {"lat_lon": [lat, lon]} From f0c082eb9bffc491761fd2255509335b1717e921 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 18:02:28 +0000 Subject: [PATCH 22/38] add council areas to save the children generator --- .../generate_save_the_children_csv.py | 73 +++---------------- 1 file changed, 9 insertions(+), 64 deletions(-) diff --git a/hub/management/commands/generate_save_the_children_csv.py b/hub/management/commands/generate_save_the_children_csv.py index c49d9cf25..ddd9e23d9 100644 --- a/hub/management/commands/generate_save_the_children_csv.py +++ b/hub/management/commands/generate_save_the_children_csv.py @@ -1,28 +1,21 @@ -from time import sleep - from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd -from tqdm import tqdm -from hub.models import Area -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate a cleaned CSV file of the number of Save the Children shops per constituency" + message = "Generating a CSV of areas for Save the Children shops" data_file = settings.BASE_DIR / "data" / "save_the_children_shops.csv" out_file = settings.BASE_DIR / "data" / "save_the_children_shops_processed.csv" + uses_gss = True + uses_postcodes = True + row_name = "shop_code" + def get_dataframe(self): df = pd.read_csv( self.data_file, @@ -45,60 +38,12 @@ def get_dataframe(self): "postcode", "constituency", ] - return df - - def get_gss_code(self, mapit, postcode): - try: - gss_code = mapit.postcode_point_to_gss_codes(postcode) - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row postcode: '{postcode}' - Error: {error}") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return self.get_gss_code(mapit, postcode) - return ",".join(gss_code) - - def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating save the children count") df = df.dropna(subset="postcode") df.constituency = df.constituency.str.strip() df.postcode = df.postcode.str.strip() - mapit = MapIt() - gss = [] - for index, row in tqdm(df.iterrows()): - area = [] - # If the constituency isn't null - if isinstance(row.constituency, str): - area = Area.objects.filter(name__iexact=row.constituency) - - if len(area) != 0: - gss.append(area[0].gss) - else: - # If the constituency doesn't work, use MapIt to enter the GSS code instead - gss.append(self.get_gss_code(mapit, row.postcode)) - - df["gss"] = gss return df - def save_data(self, df): - df.to_csv(self.out_file) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - - def handle(self, quiet=False, *args, **options): - self._quiet = quiet - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) + def get_location_from_row(self, row): + return {"postcode": row.postcode} From 0bb89bd10f4100cb66b4373086188a07e0274309 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 25 Mar 2024 18:02:57 +0000 Subject: [PATCH 23/38] add council areas to wildlife trust reserves generator --- ...rate_wildlife_trust_nature_reserves_csv.py | 68 +++---------------- 1 file changed, 10 insertions(+), 58 deletions(-) diff --git a/hub/management/commands/generate_wildlife_trust_nature_reserves_csv.py b/hub/management/commands/generate_wildlife_trust_nature_reserves_csv.py index 98deb3616..9d2696ed1 100644 --- a/hub/management/commands/generate_wildlife_trust_nature_reserves_csv.py +++ b/hub/management/commands/generate_wildlife_trust_nature_reserves_csv.py @@ -1,36 +1,26 @@ import re -from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd import requests -from tqdm import tqdm -from hub.models import Area -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand DATA_URL = "https://www.wildlifetrusts.org/jsonapi/node/reserve" POSTCODE_REGEX = r"[a-z]{1,2}\d[a-z\d]?\s*\d[a-z]{2}" -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of wildlife trust nature reserves" - tqdm.pandas() + message = "Generating a CSV of areas for wildlife trust nature reserves" out_file = settings.BASE_DIR / "data" / "wildlife_trust_reserves.csv" - con_gss_codes = list( - set([value["gss"] for value in list(Area.objects.values("gss"))]) - ) + row_name = "title" + uses_gss = True + legacy_col = "gss" + uses_postcodes = True def get_dataframe(self): if not self._quiet: @@ -61,48 +51,10 @@ def get_dataframe(self): df = df.dropna(subset=["postcode"]) return df - def get_gss_code(self, mapit, postcode): - try: - gss_code = mapit.postcode_point_to_gss_codes(postcode) - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row postcode: {postcode} - {error} raised") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return self.get_gss_code(mapit, postcode) - if gss_code: - for code in gss_code: - if code in self.con_gss_codes: - return code + def get_location_from_row(self, row): + return {"postcode": row["postcode"]} def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating GSS codes from postcodes") - mapit = MapIt() - df["gss"] = df.postcode.apply(lambda pc: self.get_gss_code(mapit, pc)) + df = super().process_data(df) df = df.drop_duplicates(subset=["title", "gss"]) return df - - def save_data(self, df): - df.to_csv(self.out_file, index=False) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - parser.add_argument( - "-i", "--ignore", action="store_true", help="Ignore existing data file" - ) - - def handle(self, quiet=False, ignore=False, *args, **options): - self._quiet = quiet - self._ignore = ignore - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) From 39723fa124a7ef596ce6594bbf3314177f093a31 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:25:53 +0000 Subject: [PATCH 24/38] update power postcodes importer to include councils --- .../import_aid_alliance_power_postcodes.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/hub/management/commands/import_aid_alliance_power_postcodes.py b/hub/management/commands/import_aid_alliance_power_postcodes.py index d9169a6ec..2b24f86d0 100644 --- a/hub/management/commands/import_aid_alliance_power_postcodes.py +++ b/hub/management/commands/import_aid_alliance_power_postcodes.py @@ -4,17 +4,30 @@ from hub.models import Area, DataSet -from .base_importers import BaseConstituencyGroupListImportCommand +from .base_importers import ( + BaseConstituencyGroupListImportCommand, + MultipleAreaTypesMixin, +) -class Command(BaseConstituencyGroupListImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyGroupListImportCommand): help = "Import Aid Alliance 'Power Postcode' data" + do_not_convert = True + message = "importing Aid Alliance 'power postcode' data" uses_gss = True cons_col = "gss" data_file = settings.BASE_DIR / "data" / "aid_alliance_power_postcodes.csv" + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } + power_postcodes = { "label": "Power Postcodes", "description": "Aid Alliance’s Power Postcodes are activist groups building a stronger connection between communities and MPs in key constituencies, on international development and the UK aid budget.", @@ -84,6 +97,10 @@ def get_df(self): "contact", "url", "gss", + "WMC", + "WMC23", + "STC", + "DIS", ] # Add Areas to df df["constituency"] = df.gss.apply(self.add_area) From 20cbbbedea6eb75f6e82da150101ba65e07a0be3 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:27:13 +0000 Subject: [PATCH 25/38] update foodbank importer to handle councils --- hub/management/commands/import_foodbank_count.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hub/management/commands/import_foodbank_count.py b/hub/management/commands/import_foodbank_count.py index bbc6cf0ba..2dc8ccd23 100644 --- a/hub/management/commands/import_foodbank_count.py +++ b/hub/management/commands/import_foodbank_count.py @@ -2,10 +2,10 @@ from hub.models import AreaData, DataSet -from .base_importers import BaseConstituencyCountImportCommand +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin -class Command(BaseConstituencyCountImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): help = "Import data about number of foodbanks of per constituency" message = "importing consituency foodbank count" @@ -14,6 +14,14 @@ class Command(BaseConstituencyCountImportCommand): cons_col = "gss" data_file = settings.BASE_DIR / "data" / "foodbanks_per_constituency.csv" + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } + defaults = { "label": "Number of Trussell Trust foodbanks", "data_type": "integer", From c5d0d023a9c4863809e32c0699d6bb47352202c9 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:27:53 +0000 Subject: [PATCH 26/38] update GBGW importers to handle councils --- .../commands/import_council_gbgw_events.py | 39 ------------------- hub/management/commands/import_gbgw_events.py | 13 +++++-- .../commands/import_gbgw_events_23.py | 13 +++++-- 3 files changed, 20 insertions(+), 45 deletions(-) delete mode 100644 hub/management/commands/import_council_gbgw_events.py diff --git a/hub/management/commands/import_council_gbgw_events.py b/hub/management/commands/import_council_gbgw_events.py deleted file mode 100644 index d17bb1172..000000000 --- a/hub/management/commands/import_council_gbgw_events.py +++ /dev/null @@ -1,39 +0,0 @@ -from django.conf import settings - -from hub.models import DataSet - -from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin - - -class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): - help = "Import data about number of GBGW events per council" - message = "Importing 2022 GBGW events" - uses_gss = False - - data_file = settings.BASE_DIR / "data" / "gbgw_events_processed.csv" - cons_col_map = { - "STC": "STC", - "DIS": "DIS", - } - area_types = ["STC", "DIS"] - - data_sets = { - "council_gbgw_2022_event_count": { - "defaults": { - "label": "Number of Great Big Green Week 2022 events", - "release_date": "October 2022", - "data_type": "integer", - "category": "movement", - "subcategory": "events", - "source_label": "Data from The Climate Coalition.", - "source": "https://greatbiggreenweek.com/", - "source_type": "google sheet", - "data_url": "", - "table": "areadata", - "default_value": 10, - "comparators": DataSet.numerical_comparators(), - "unit_type": "raw", - "unit_distribution": "people_in_area", - } - } - } diff --git a/hub/management/commands/import_gbgw_events.py b/hub/management/commands/import_gbgw_events.py index f5019c87a..f0a9ead0d 100644 --- a/hub/management/commands/import_gbgw_events.py +++ b/hub/management/commands/import_gbgw_events.py @@ -2,16 +2,23 @@ from hub.models import DataSet -from .base_importers import BaseConstituencyCountImportCommand +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin -class Command(BaseConstituencyCountImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): help = "Import data about number of GBGW events per constituency" message = "Importing 2022 GBGW events" uses_gss = False data_file = settings.BASE_DIR / "data" / "gbgw_events_processed.csv" - cons_col = "area" + + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } data_sets = { "constituency_gbgw_2022_event_count": { diff --git a/hub/management/commands/import_gbgw_events_23.py b/hub/management/commands/import_gbgw_events_23.py index f22b7ccd0..4d60aa1b5 100644 --- a/hub/management/commands/import_gbgw_events_23.py +++ b/hub/management/commands/import_gbgw_events_23.py @@ -2,16 +2,23 @@ from hub.models import DataSet -from .base_importers import BaseConstituencyCountImportCommand +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin -class Command(BaseConstituencyCountImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): help = "Import data about number of GBGW events in 2023 per constituency" message = "Importing 2023 GBGW events" uses_gss = False data_file = settings.BASE_DIR / "data" / "gbgw_events_23_processed.csv" - cons_col = "area" + + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } data_sets = { "constituency_gbgw_2023_event_count": { From 6341917a229c634509d591ffe754432da03871e6 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:28:17 +0000 Subject: [PATCH 27/38] update NT property importer to handle councils --- .../commands/import_nt_property_locations.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/hub/management/commands/import_nt_property_locations.py b/hub/management/commands/import_nt_property_locations.py index fd4cb1542..9161dbe6e 100644 --- a/hub/management/commands/import_nt_property_locations.py +++ b/hub/management/commands/import_nt_property_locations.py @@ -5,13 +5,23 @@ from hub.models import Area, AreaData, DataSet -from .base_importers import BaseAreaImportCommand +from .base_importers import BaseAreaImportCommand, MultipleAreaTypesMixin -class Command(BaseAreaImportCommand): +class Command(MultipleAreaTypesMixin, BaseAreaImportCommand): help = "Import data about NT properties per constituency" data_file = settings.BASE_DIR / "data" / "national_trust_properties.csv" source_url = "https://www.nationaltrust.org.uk/search" + do_not_convert = True + + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } + defaults = { "label": "National Trust properties", "data_type": "json", @@ -62,14 +72,6 @@ def delete_data(self): for data_type in self.data_types.values(): AreaData.objects.filter(data_type=data_type).delete() - def handle(self, quiet=False, *args, **kwargs): - self._quiet = quiet - self.add_data_sets() - self.delete_data() - self.process_data() - self.update_averages() - self.update_max_min() - def process_data(self): df = pd.read_csv(self.data_file) # df.group_name = df.group_name.apply( @@ -81,7 +83,7 @@ def process_data(self): # Group by the area, and add the data from there area_type = self.get_area_type() - for area_name, data in tqdm(df.groupby("area")): + for area_name, data in tqdm(df.groupby(self.cons_col_map[area_type.code])): try: area = Area.objects.get(name=area_name, area_type=area_type) except Area.DoesNotExist: From 5ce9d6430473b7cdebbf2e96e76af1f02ece1c92 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:28:54 +0000 Subject: [PATCH 28/38] update windfarm importer to handle councils --- hub/management/commands/import_onshore_windfarms.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hub/management/commands/import_onshore_windfarms.py b/hub/management/commands/import_onshore_windfarms.py index 3880b2c13..0e08eb377 100644 --- a/hub/management/commands/import_onshore_windfarms.py +++ b/hub/management/commands/import_onshore_windfarms.py @@ -4,10 +4,10 @@ from hub.models import AreaData, DataSet -from .base_importers import BaseConstituencyCountImportCommand +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin -class Command(BaseConstituencyCountImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): help = "Import data about number of onshort windfarms of per constituency" message = "importing consituency windfarm count" @@ -15,6 +15,14 @@ class Command(BaseConstituencyCountImportCommand): cons_col = "gss" data_file = settings.BASE_DIR / "data" / "windfarms_per_constituency.csv" + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } + defaults = { "label": "Number of onshore windfarms", "data_type": "integer", From c39df7a0101b1e39814ec9fca7b1ae252618e2c2 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:29:20 +0000 Subject: [PATCH 29/38] update RSPB importer to handle councils --- .../commands/import_rspb_nature_reserves.py | 68 +++++++------------ 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/hub/management/commands/import_rspb_nature_reserves.py b/hub/management/commands/import_rspb_nature_reserves.py index ed28c4633..8c063db2b 100644 --- a/hub/management/commands/import_rspb_nature_reserves.py +++ b/hub/management/commands/import_rspb_nature_reserves.py @@ -1,16 +1,29 @@ from django.conf import settings import pandas as pd -from tqdm import tqdm -from hub.models import Area, AreaData, AreaType, DataSet +from hub.models import DataSet -from .base_importers import BaseAreaImportCommand +from .base_importers import ( + BaseConstituencyGroupListImportCommand, + MultipleAreaTypesMixin, +) -class Command(BaseAreaImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyGroupListImportCommand): help = "Import data about RSPB reserves in each constituency" data_file = settings.BASE_DIR / "data" / "rspb_reserves.csv" + message = "Importing RSPB reserves data" + + uses_gss = True + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } + defaults = { "label": "RSPB Reserves", "data_type": "json", @@ -53,46 +66,11 @@ class Command(BaseAreaImportCommand): }, } - def handle(self, quiet=False, *args, **kwargs): - self._quiet = quiet - self.add_data_sets() - self.delete_data() - self.process_data() - self.update_max_min() - - def process_data(self): - df = pd.read_csv(self.data_file) - - if not self._quiet: - self.stdout.write("Importing rspb reserves data") - - # Group by the area, and add the data from there - for gss_list, data in tqdm(df.groupby("gss")): - for gss in gss_list.split(","): - try: - area = Area.objects.filter( - area_type=AreaType.objects.get(code="WMC") - ).get(gss=gss) - except Area.DoesNotExist: - continue - - json = [] - for index, row in data.iterrows(): - json.append({"group_name": row["name"]}) - - json_data, created = AreaData.objects.update_or_create( - data_type=self.data_types["rspb_reserves"], - area=area, - json=json, - ) + group_data_type = "rspb_reserves" + count_data_type = "rspb_reserves_count" - count_data, created = AreaData.objects.update_or_create( - data_type=self.data_types["rspb_reserves_count"], - area=area, - data=len(data), - ) + def get_df(self): + return pd.read_csv(self.data_file) - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) + def get_group_json(self, row): + return {"group_name": row["name"]} From 1f0986c3d9f8b7c26706b8a3fc7b95467548a5b3 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:29:47 +0000 Subject: [PATCH 30/38] update save the children importer to handle councils --- .../commands/import_save_the_children_shop_count.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hub/management/commands/import_save_the_children_shop_count.py b/hub/management/commands/import_save_the_children_shop_count.py index 9b0ffb1e5..823393846 100644 --- a/hub/management/commands/import_save_the_children_shop_count.py +++ b/hub/management/commands/import_save_the_children_shop_count.py @@ -2,17 +2,23 @@ from hub.models import AreaData, DataSet -from .base_importers import BaseConstituencyCountImportCommand +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin -class Command(BaseConstituencyCountImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): help = "Import data about the number of Save the Children shops per constituency" message = "importing Save the Children shop count" uses_gss = True - cons_col = "gss" data_file = settings.BASE_DIR / "data" / "save_the_children_shops_processed.csv" + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } defaults = { "label": "Number of Save the Children shops", "data_type": "integer", From e10c744beead8636797c3c30b364bf2b1e97592d Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:30:12 +0000 Subject: [PATCH 31/38] update wildlife trust reserve importer to handle councils --- .../import_wildlife_trust_reserves.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/hub/management/commands/import_wildlife_trust_reserves.py b/hub/management/commands/import_wildlife_trust_reserves.py index 13e22d8e3..2f52ec110 100644 --- a/hub/management/commands/import_wildlife_trust_reserves.py +++ b/hub/management/commands/import_wildlife_trust_reserves.py @@ -6,10 +6,13 @@ from hub.models import DataSet -from .base_importers import BaseConstituencyGroupListImportCommand +from .base_importers import ( + BaseConstituencyGroupListImportCommand, + MultipleAreaTypesMixin, +) -class Command(BaseConstituencyGroupListImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyGroupListImportCommand): help = "Import data about wildlife trust reserves in each constituency" message = "Importing wildlife trusts reserves data" @@ -62,11 +65,21 @@ class Command(BaseConstituencyGroupListImportCommand): group_data_type = "wildlife_trusts_reserves" count_data_type = "wildlife_trusts_reserves_count" - use_gss = True + + uses_gss = True + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } def get_df(self): return pd.read_csv( - self.data_file, names=["group_name", "trust", "url", "postcode", "gss"] + self.data_file, + names=["group_name", "trust", "url", "postcode", "gss", *self.area_types], + header=0, ) def get_group_json(self, row): From 68c78de2679a8c0777fd5b0c10faaed3967f5c6f Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 26 Mar 2024 16:30:33 +0000 Subject: [PATCH 32/38] update WI group importer to handle councils --- .../commands/import_wi_group_locations.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hub/management/commands/import_wi_group_locations.py b/hub/management/commands/import_wi_group_locations.py index b241d4825..7d397cef8 100644 --- a/hub/management/commands/import_wi_group_locations.py +++ b/hub/management/commands/import_wi_group_locations.py @@ -6,15 +6,27 @@ from hub.models import DataSet -from .base_importers import BaseConstituencyGroupListImportCommand +from .base_importers import ( + BaseConstituencyGroupListImportCommand, + MultipleAreaTypesMixin, +) -class Command(BaseConstituencyGroupListImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyGroupListImportCommand): help = "Import data about WI groups per constituency" message = "Importing Women's Institute group data" data_file = settings.BASE_DIR / "data" / "wi_groups.csv" source_url = "https://www.thewi.org.uk/wis-a-z" + + uses_gss = True + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } defaults = { "label": "Women’s Institute groups", "data_type": "json", @@ -70,7 +82,7 @@ def get_df(self): df.group_name = df.group_name.apply( lambda x: x.split(" | ")[0] if isinstance(x, str) else x ) - df.columns = ["group_name", "url", "lat_lon", "constituency"] + df.columns = ["group_name", "url", "lat_lon", "constituency", *self.area_types] return df def get_group_json(self, row): From 83eaaf66ce02e8eff12efe624846e07bd2812ea8 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 27 Mar 2024 12:47:27 +0000 Subject: [PATCH 33/38] import council polling data from Onward and RenewableUK --- .../commands/import_council_mrp_data.py | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 hub/management/commands/import_council_mrp_data.py diff --git a/hub/management/commands/import_council_mrp_data.py b/hub/management/commands/import_council_mrp_data.py new file mode 100644 index 000000000..c4c574fe3 --- /dev/null +++ b/hub/management/commands/import_council_mrp_data.py @@ -0,0 +1,159 @@ +from mysoc_dataset import get_dataset_df + +from hub.import_utils import add_gss_codes, filter_authority_type +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + message = "Importing council MRP data" + uses_gss = True + do_not_convert = True + cons_row = "gss_code" + + area_types = ["STC", "DIS"] + + defaults = { + "data_type": "integer", + "category": "opinion", + "source_label": "Public First MRP Polling, commissioned by Onward UK, adjusted for the demographics of the local area by mySociety.", + "release_date": "July 2022", + "source": "https://www.publicfirst.co.uk/new-public-first-polling-for-onward.html", + "source_type": "google sheet", + "subcategory": "net_zero_support", + "table": "areadata", + "default_value": 10, + "data_url": "", + "exclude_countries": ["Northern Ireland"], + "comparators": DataSet.numerical_comparators(), + "unit_type": "percentage", + "unit_distribution": "people_in_area", + } + + survation_defaults = { + "data_type": "percent", + "category": "opinion", + "source_label": "Survation MRP polling, commissioned by RenewableUK, adjusted for the demographics of the local area by mySociety.", + "release_date": "September 2022", + "source": "https://www.renewableuk.com/news/615931/Polling-in-every-constituency-in-Britain-shows-strong-support-for-wind-farms-to-drive-down-bills.htm", + "source_type": "google sheet", + "table": "areadata", + "exclude_countries": ["Northern Ireland"], + "default_value": 50, + "comparators": DataSet.numerical_comparators(), + "unit_type": "percentage", + "unit_distribution": "people_in_area", + } + + data_sets = { + "constituency_nz_support": { + "defaults": { + **defaults, + "label": "Support Net Zero", + }, + "col": "Q02_Support", + }, + "constituency_nz_neutral": { + "defaults": { + **defaults, + "label": "Neither support nor oppose Net Zero", + }, + "col": "Q02_Neutral", + }, + "constituency_nz_oppose": { + "defaults": {**defaults, "label": "Oppose Net Zero"}, + "col": "Q02_Oppose", + }, + "constituency_cc_high": { + "defaults": { + **defaults, + "label": "Consider climate change a high priority", + }, + "col": "Q07_High", + }, + "support-offshore-wind": { + "defaults": { + **survation_defaults, + "label": "Support offshore wind", + "subcategory": "renewable_energy", + "order": 1, + }, + "col": "Q4.1", + }, + "support-onshore-wind": { + "defaults": { + **survation_defaults, + "label": "Support onshore wind", + "subcategory": None, + "order": 2, + }, + "col": "Q4.2", + }, + "support-solar": { + "defaults": { + **survation_defaults, + "label": "Support solar power", + "subcategory": "renewable_energy", + "order": 3, + }, + "col": "Q4.3", + }, + "support-tidal": { + "defaults": { + **survation_defaults, + "label": "Support tidal energy", + "subcategory": "renewable_energy", + "order": 4, + }, + "col": "Q4.4", + }, + "support-wave": { + "defaults": { + **survation_defaults, + "label": "Support wave energy", + "subcategory": None, + "order": 5, + }, + "col": "Q4.5", + }, + "support-nuclear": { + "defaults": { + **survation_defaults, + "label": "Support nuclear energy", + "subcategory": "renewable_energy", + "order": 6, + }, + "col": "Q4.6", + }, + "support-local-renewable": { + "defaults": { + **survation_defaults, + "label": "Support renewable energy projects in their local area", + "subcategory": None, + "order": 7, + }, + "col": "Q5", + }, + } + del data_sets["constituency_cc_high"]["defaults"]["subcategory"] + + def get_row_data(self, row, conf): + return row[conf["col"]] * 100 + + def get_dataframe(self): + df = get_dataset_df( + repo_name="climate_mrp_polling", + package_name="local_authority_climate_polling", + version_name="latest", + file_name="local_authority_climate_polling.csv", + done_survey=True, + ) + + df = add_gss_codes(df, "local-authority-code") + df = filter_authority_type(df, self.area_type, self.cons_row) + df = df.pivot( + index="gss_code", columns="question", values="percentage" + ).reset_index() + + return df From 6f701790076b60173259dbf85dd770cccb876dda Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 2 Apr 2024 16:31:41 +0100 Subject: [PATCH 34/38] import council level IMD data --- .../commands/import_council_imd_data.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 hub/management/commands/import_council_imd_data.py diff --git a/hub/management/commands/import_council_imd_data.py b/hub/management/commands/import_council_imd_data.py new file mode 100644 index 000000000..9e849d576 --- /dev/null +++ b/hub/management/commands/import_council_imd_data.py @@ -0,0 +1,68 @@ +import pandas as pd +from mysoc_dataset import get_dataset_url + +from hub.import_utils import add_gss_codes, filter_authority_type +from hub.models import DataSet + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + help = "Import Council IMD data" + + message = "Importing council IMD data" + cons_row = "gss_code" + area_types = ["STC", "DIS"] + uses_gss = True + do_not_convert = True + + data_sets = { + "constituency_imd": { + "defaults": { + "source": "https://mysociety.github.io/composite_uk_imd/", + "source_label": "Data from ONS (England and Wales), NRS (Scotland), and NISRA (Northern Ireland), collated and standardised by mySociety.", + "name": "constituency_imd", + "description": "Deciles of deprivation, from 1 (most deprived) to 10 (least deprived). This uses a composite measure of deprivation (including income, employment, education, skills, health, crime, and housing) standardised across the countries of the UK.", + "label": "Index of Multiple Deprivation (IMD)", + "data_type": "integer", + "category": "place", + "source_type": "csv", + "table": "areadata", + "comparators": DataSet.numerical_comparators()[::-1], + "default_value": 5, + "unit_type": "percentage", + "unit_distribution": "people_in_area", + "is_public": True, + }, + "col": "la-imd-pop-quintile", + } + } + package = { + "repo_name": "composite_uk_imd", + "package_name": "uk_index", + "version_name": "3.3.0", + "file_name": "la_imd.csv", + } + + def __init__(self): + super().__init__() + + url = get_dataset_url( + repo_name=self.package["repo_name"], + package_name=self.package["package_name"], + version_name=self.package["version_name"], + file_name=self.package["file_name"], + done_survey=True, + ) + + self.data_sets["constituency_imd"]["defaults"]["data_url"] = url + + def get_dataframe(self): + df = pd.read_csv(self.data_sets["constituency_imd"]["defaults"]["data_url"]) + + df = add_gss_codes(df, "local-authority-code") + df = filter_authority_type(df, self.area_type, self.cons_row) + return df + + def update_averages(self): + pass From c3131a9d17572337793ae00435467c8b03d0d516 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 9 Apr 2024 14:16:13 +0100 Subject: [PATCH 35/38] generate/import tearfund church data for all areas --- .../generate_tearfund_churches_csv.py | 72 +++---------------- .../commands/import_tearfund_churches.py | 12 +++- 2 files changed, 19 insertions(+), 65 deletions(-) diff --git a/hub/management/commands/generate_tearfund_churches_csv.py b/hub/management/commands/generate_tearfund_churches_csv.py index c54890a2e..86f0eb9fd 100644 --- a/hub/management/commands/generate_tearfund_churches_csv.py +++ b/hub/management/commands/generate_tearfund_churches_csv.py @@ -1,56 +1,30 @@ import re -from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand import pandas as pd -from tqdm import tqdm -from utils.mapit import ( - BadRequestException, - ForbiddenException, - InternalServerErrorException, - MapIt, - NotFoundException, - RateLimitException, -) +from .base_generators import BaseLatLonGeneratorCommand POSTCODE_REGEX = r"[a-z]{1,2}\d[a-z\d]?\s*\d[a-z]{2}" -class Command(BaseCommand): +class Command(BaseLatLonGeneratorCommand): help = "Generate CSV file of churches which have declared a climate emergency" + message = "Generating a CSV of churches that have declared a climate emergency" data_file = settings.BASE_DIR / "data" / "tearfund_churches.csv" out_file = settings.BASE_DIR / "data" / "tearfund_churches_processed.csv" + row_name = "church" + uses_gss = True + uses_postcodes = True + def get_dataframe(self): df = pd.read_csv(self.data_file, usecols=["Church / Organisation", "Address"]) # Remove first row, which just has the number of rows df = df.iloc[1:, :] df.columns = ["church", "address"] - return df - - def get_gss_code(self, mapit, postcode): - try: - gss_code = mapit.postcode_point_to_gss_codes(postcode) - except ( - NotFoundException, - BadRequestException, - InternalServerErrorException, - ForbiddenException, - ) as error: - print(f"Error fetching row postcode: {postcode} - {error} raised") - return None - except RateLimitException as error: - print(f"Mapit Error - {error}, waiting for a minute") - sleep(60) - return self.get_gss_code(mapit, postcode) - return ",".join(gss_code) - - def get_all_gss_codes_from_mapit(self, df): - df = df.copy() # Get postcodes from church and address cols df["church_regexed_postcode"] = df.church.str.findall( POSTCODE_REGEX, flags=re.IGNORECASE @@ -63,35 +37,7 @@ def get_all_gss_codes_from_mapit(self, df): df["postcode"] = df.address_regexed_postcode.combine_first( df.church_regexed_postcode ) - - gss_codes = [] - mapit = MapIt() - for postcode in tqdm(df.postcode): - gss_code = None - if postcode: - gss_code = self.get_gss_code(mapit, postcode) - gss_codes.append(gss_code) - - return pd.Series(gss_codes, name="gss") - - def process_data(self, df): - if not self._quiet: - self.stdout.write("Generating churches per constituency data") - - gss_codes = self.get_all_gss_codes_from_mapit(df) - df = pd.concat([df, gss_codes], axis=1) return df - def save_data(self, df): - df.to_csv(self.out_file) - - def add_arguments(self, parser): - parser.add_argument( - "-q", "--quiet", action="store_true", help="Silence progress bars." - ) - - def handle(self, quiet=False, *args, **options): - self._quiet = quiet - df = self.get_dataframe() - out_df = self.process_data(df) - self.save_data(out_df) + def get_location_from_row(self, row): + return {"postcode": row.postcode} diff --git a/hub/management/commands/import_tearfund_churches.py b/hub/management/commands/import_tearfund_churches.py index dd94d7a8f..7021ba875 100644 --- a/hub/management/commands/import_tearfund_churches.py +++ b/hub/management/commands/import_tearfund_churches.py @@ -2,10 +2,10 @@ from hub.models import AreaData, DataSet -from .base_importers import BaseConstituencyCountImportCommand +from .base_importers import BaseConstituencyCountImportCommand, MultipleAreaTypesMixin -class Command(BaseConstituencyCountImportCommand): +class Command(MultipleAreaTypesMixin, BaseConstituencyCountImportCommand): help = "Import data about number of churches that have declared a climate emergency" message = "importing churches with a declared climate emergency count" @@ -13,6 +13,14 @@ class Command(BaseConstituencyCountImportCommand): cons_col = "gss" data_file = settings.BASE_DIR / "data" / "tearfund_churches_processed.csv" + area_types = ["WMC", "WMC23", "STC", "DIS"] + cons_col_map = { + "WMC": "WMC", + "WMC23": "WMC23", + "STC": "STC", + "DIS": "DIS", + } + defaults = { "label": "Churches that have declared a climate emergency", "data_type": "integer", From 28f3abf169b65840d693495b5a8394ce7efc929a Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 9 Apr 2024 14:21:20 +0100 Subject: [PATCH 36/38] update dataset list generator to add council area types --- .../generate_csv_of_available_datasets.py | 38 +++---------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/hub/management/commands/generate_csv_of_available_datasets.py b/hub/management/commands/generate_csv_of_available_datasets.py index e21ef0e42..d5df3f552 100644 --- a/hub/management/commands/generate_csv_of_available_datasets.py +++ b/hub/management/commands/generate_csv_of_available_datasets.py @@ -4,7 +4,7 @@ import pandas as pd from tqdm import tqdm -from hub.models import Area, DataSet, Person +from hub.models import DataSet class Command(BaseCommand): @@ -22,38 +22,6 @@ def handle(self, quiet=False, *args, **options): df = self.build_dataframe() df.to_csv(self.out_file) - def get_area_data(self): - area_details = [] - for area in Area.objects.filter(area_type__code="WMC"): - try: - mp = Person.objects.get(area=area) - except Person.DoesNotExist: - print(f"Person does not exist for area {area.gss} {area.name}") - area_details.append([area.gss, area.name, area.mapit_id, mp.name]) - return pd.DataFrame( - area_details, - columns=["Area GSS code", "Area name", "Area MapIt ID", "MP name"], - ).set_index("Area GSS code") - - def create_dataset_df(self, data, label, table): - df_data = [] - for datum in data: - if table == "areadata": - area = datum.area - else: - area = datum.person.area - df_data.append([area.gss, datum.value()]) - df = pd.DataFrame(df_data, columns=["Area GSS code", label]) - # Deal with any multiples, by concatenating them into one string - df = df.groupby("Area GSS code").agg( - { - "Area GSS code": "first", - label: lambda data_list: ", ".join([str(x) for x in data_list]), - } - ) - df = df.set_index("Area GSS code") - return df - def build_dataframe(self): # Next, iterate through each (filterable) data set in the db datasets = [] @@ -72,6 +40,8 @@ def build_dataframe(self): data_set.is_public, "WMC" in areas_available, "WMC23" in areas_available, + "STC" in areas_available, + "DIS" in areas_available, ] ) @@ -85,6 +55,8 @@ def build_dataframe(self): "Public", "2010 Cons", "2024 Cons", + "Single Tier Councils", + "District Councils", ], ) return df From 57bb97dce25c133caf2c26abaa994150f598060d Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 17 Apr 2024 14:35:22 +0100 Subject: [PATCH 37/38] local council action scorecards import --- .../import_council_scorecards_score.py | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 hub/management/commands/import_council_scorecards_score.py diff --git a/hub/management/commands/import_council_scorecards_score.py b/hub/management/commands/import_council_scorecards_score.py new file mode 100644 index 000000000..08fe89d8e --- /dev/null +++ b/hub/management/commands/import_council_scorecards_score.py @@ -0,0 +1,167 @@ +from django.conf import settings + +import pandas as pd +from tqdm import tqdm + +from hub.import_utils import filter_authority_type +from hub.models import DataSet, DataType + +from .base_importers import BaseImportFromDataFrameCommand, MultipleAreaTypesMixin + +declare_map = { + "Y": "Yes", + "N": "No", +} + + +class Command(MultipleAreaTypesMixin, BaseImportFromDataFrameCommand): + cons_row = "gss_code" + message = "Importing council scorecards data" + uses_gss = True + do_not_convert = True + + data_file = settings.BASE_DIR / "data" / "2023_scorecards_data.csv" + + area_types = ["STC", "DIS"] + + defaults = { + "label": "Council Climate Action Scorecard", + "description": "", + "data_type": "percent", + "category": "place", + "release_date": "2023", + "source_label": "Data from Climate Emergency UK.", + "source": "https://councilclimatescorecards.uk/", + "source_type": "csv", + "table": "areadata", + "data_url": "", + "comparators": DataSet.numerical_comparators(), + "is_filterable": True, + "is_shadable": True, + "is_public": True, + "unit_type": "raw", + "unit_distribution": "physical_area", + } + + data_sets = { + "council_action_scorecard_total": { + "defaults": defaults, + "col": "weighted_total", + }, + "council_action_scorecard_bh": { + "defaults": { + **defaults, + "label": "Buildings & Heating", + }, + "col": "Buildings & Heating", + }, + "council_action_scorecard_transport": { + "defaults": { + **defaults, + "label": "Transport", + }, + "col": "Transport", + }, + "council_action_scorecard_planning": { + "defaults": { + **defaults, + "label": "Planning & Land Use", + }, + "col": "Planning & Land Use", + }, + "council_action_scorecard_governance": { + "defaults": { + **defaults, + "label": "Goverance & Finance", + }, + "col": "Governance & Finance", + }, + "council_action_scorecard_biodiversity": { + "defaults": { + **defaults, + "label": "Biodiversity", + }, + "col": "Biodiversity", + }, + "council_action_scorecard_collaboration": { + "defaults": { + **defaults, + "label": "Collaboration & Engagement", + }, + "col": "Collaboration & Engagement", + }, + "council_action_scorecard_waste": { + "defaults": { + **defaults, + "label": "Waste Reduction & Food", + }, + "col": "Waste Reduction & Food", + }, + } + + # do not want to calculate averages as the comparisons are only relevant + # to councils of the same type + def update_averages(self): + pass + + def add_data_sets(self, df): + if not self._quiet: + self.stdout.write("Creating dataset + types") + + total_data_set, created = DataSet.objects.update_or_create( + name="council_action_scorecard_total", defaults=self.defaults + ) + + section_data_set, created = DataSet.objects.update_or_create( + name="council_action_scorecard_sections", + defaults={ + **self.defaults, + "is_range": True, + "label": "Action Scorecards section scores", + }, + ) + + total_data_set.areas_available.add(self.get_area_type()) + section_data_set.areas_available.add(self.get_area_type()) + + for name in tqdm(self.data_sets.keys()): + if name == "council_action_scorecard_total": + data_set = total_data_set + else: + data_set = section_data_set + + data_type, created = DataType.objects.update_or_create( + data_set=data_set, + name=name, + area_type=self.get_area_type(), + defaults={ + "data_type": "percent", + "label": self.data_sets[name]["defaults"]["label"], + }, + ) + self.data_types[name] = data_type + + def get_dataframe(self): + df = pd.read_csv(self.data_file) + df = filter_authority_type(df, self.area_type, "gss") + + councils = [] + for index, row in df.iterrows(): + councils.append( + { + "gss_code": row["gss"], + "weighted_total": row["weighted_total"] * 100, + "Buildings & Heating": row["Buildings & Heating"] * 100, + "Transport": row["Transport"] * 100, + "Governance & Finance": row["Governance & Finance"] * 100, + "Biodiversity": row["Biodiversity"] * 100, + "Planning & Land Use": row["Planning & Land Use"] * 100, + "Waste Reduction & Food": row["Waste Reduction & Food"] * 100, + "Collaboration & Engagement": row["Collaboration & Engagement"] + * 100, + } + ) + + df = pd.DataFrame(councils) + + return df From 3ef66ef24ba00ce71c5571474289e6c77c9836ee Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Wed, 17 Apr 2024 14:35:40 +0100 Subject: [PATCH 38/38] display changes to support local council scorecards move range display into include allow a range related dataset for datasets --- hub/templates/hub/area.html | 34 +++++++++--------------- hub/templates/hub/area/_range_data.html | 35 +++++++++++++++++++++++++ hub/views/area.py | 4 +++ 3 files changed, 52 insertions(+), 21 deletions(-) create mode 100644 hub/templates/hub/area/_range_data.html diff --git a/hub/templates/hub/area.html b/hub/templates/hub/area.html index de202b37d..aaa84405e 100644 --- a/hub/templates/hub/area.html +++ b/hub/templates/hub/area.html @@ -475,6 +475,8 @@

    Place

    {% for dataset in categories.place %} {% if dataset.is_range and dataset.data|length > 9 %}
    + {% elif dataset.related_category and dataset.related_category.is_range %} +
    {% elif dataset.is_range or dataset.data_type == "json" %}
    {% else %} @@ -502,7 +504,7 @@

    {{ dataset.label }}

    {% else %}

    {{ dataset.data.value|intcomma }}

    {% endif %} - {% if dataset.data.average %} + {% if dataset.data.average is not None %} {% if dataset.subcategory == "date" %}

    {{dataset.data.average|floatformat:"0" }} national average

    {% else %} @@ -513,34 +515,24 @@

    {{ dataset.label }}

    {% include 'hub/area/_json_data.html' with dataset=dataset.related_category %} {% endif %} {% elif dataset.is_range and dataset.data|length > 0 %} - - - - - - - - - - {% for row in dataset.data %} - - - - - - {% endfor %} - -
    This areaNational average
    - {{ row.label|html_format_dataset_name|safe }} - {{ row.value|floatformat }}%{{ row.average|floatformat }}%
    + {% include 'hub/area/_range_data.html' with dataset=dataset %} {% else %}

    {{ dataset.data.value|floatformat }}%

    + {% if dataset.data.average is not None %}

    {{dataset.data.average|floatformat }}% national average

    + {% endif %} + {% if dataset.related_category and dataset.related_category.is_range %} + {% include 'hub/area/_range_data.html' with dataset=dataset.related_category %} + {% endif %} {% endif %}