From e6261c826d750e0e3032c5343614e4a41e15ff55 Mon Sep 17 00:00:00 2001 From: Alexander Griffen Date: Thu, 21 Dec 2023 16:23:39 +0000 Subject: [PATCH] DATASET: RSPB nature reserves --- .../commands/import_rspb_nature_reserves.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 hub/management/commands/import_rspb_nature_reserves.py diff --git a/hub/management/commands/import_rspb_nature_reserves.py b/hub/management/commands/import_rspb_nature_reserves.py new file mode 100644 index 000000000..1cd5aba34 --- /dev/null +++ b/hub/management/commands/import_rspb_nature_reserves.py @@ -0,0 +1,100 @@ +from time import sleep + +from django.conf import settings + +import pandas as pd +from tqdm import tqdm + +from hub.models import Area, AreaData, DataSet, DataType + +from .base_importers import BaseLatLongImportCommand + + +class Command(BaseLatLongImportCommand): + help = "Import number of RSPB reserves by constituency" + + data_file = settings.BASE_DIR / "data" / "rspb_reserves_centroids.csv" + message = "Importing RSPB reserves data" + uses_gss = False + + defaults = {} + + data_sets = { + "": { + "defaults": defaults, + }, + } + + def add_data_sets(self): + data_set, created = DataSet.objects.update_or_create( + name="rspb_nature_reserves", + defaults={ + "label": "RSPB Reserves", + "data_type": "integer", + "category": "place", + "label": "RSPB Reserves", + "description": "Number of RSPB reserves by constituency.", + "source_label": "Data from the RSPB.", + "release_date": "September 2023", + "source": "https://www.rspb.org.uk", + "source_type": "csv", + "table": "areadata", + "default_value": {}, + "data_url": "", + "is_filterable": False, + "comparators": DataSet.comparators_default, + }, + ) + + data_type, created = DataType.objects.update_or_create( + data_set=data_set, + name="rspb_nature_reserves", + defaults={ + "data_type": "integer", + "label": "RSPB Reserves", + "description": "Number of RSPB reserves by constituency.", + }, + ) + + self.data_type = data_type + self.data_types[data_type.name] = data_type + + def get_dataframe(self): + df = pd.read_csv( + self.data_file, + usecols=["xcoord", "ycoord"], + ) + df.columns = ["Longitude", "Latitude"] + return df + + def process_data(self, df): + df = self.get_dataframe() + + if not self._quiet: + self.stdout.write("Importing RSPB reserve count data") + + for index, row in tqdm(df.iterrows(), disable=self._quiet): + lat = row["Latitude"] + lon = row["Longitude"] + + self.process_lat_long(lat=lat, lon=lon, row_name=index) + + if index > 0 and index % 50 == 0: + sleep(10) + + def add_arguments(self, parser): + parser.add_argument( + "-q", "--quiet", action="store_true", help="Silence progress bars." + ) + + def handle(self, quiet=False, *args, **options): + self._quiet = quiet + self.add_data_sets() + self.delete_data() + df = self.get_dataframe() + self.process_data(df) + self.update_averages() + self.update_max_min() + + def delete_data(self): + AreaData.objects.filter(data_type__in=self.data_types.values()).delete()