Skip to content

Commit

Permalink
DATASET: RSPB nature reserves
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-griffen committed Dec 21, 2023
1 parent 99e7a05 commit e6261c8
Showing 1 changed file with 100 additions and 0 deletions.
100 changes: 100 additions & 0 deletions hub/management/commands/import_rspb_nature_reserves.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from time import sleep

from django.conf import settings

import pandas as pd
from tqdm import tqdm

from hub.models import Area, AreaData, DataSet, DataType

from .base_importers import BaseLatLongImportCommand


class Command(BaseLatLongImportCommand):
help = "Import number of RSPB reserves by constituency"

data_file = settings.BASE_DIR / "data" / "rspb_reserves_centroids.csv"
message = "Importing RSPB reserves data"
uses_gss = False

defaults = {}

data_sets = {
"": {
"defaults": defaults,
},
}

def add_data_sets(self):
data_set, created = DataSet.objects.update_or_create(
name="rspb_nature_reserves",
defaults={
"label": "RSPB Reserves",
"data_type": "integer",
"category": "place",
"label": "RSPB Reserves",
"description": "Number of RSPB reserves by constituency.",
"source_label": "Data from the RSPB.",
"release_date": "September 2023",
"source": "https://www.rspb.org.uk",
"source_type": "csv",
"table": "areadata",
"default_value": {},
"data_url": "",
"is_filterable": False,
"comparators": DataSet.comparators_default,
},
)

data_type, created = DataType.objects.update_or_create(
data_set=data_set,
name="rspb_nature_reserves",
defaults={
"data_type": "integer",
"label": "RSPB Reserves",
"description": "Number of RSPB reserves by constituency.",
},
)

self.data_type = data_type
self.data_types[data_type.name] = data_type

def get_dataframe(self):
df = pd.read_csv(
self.data_file,
usecols=["xcoord", "ycoord"],
)
df.columns = ["Longitude", "Latitude"]
return df

def process_data(self, df):
df = self.get_dataframe()

if not self._quiet:
self.stdout.write("Importing RSPB reserve count data")

for index, row in tqdm(df.iterrows(), disable=self._quiet):
lat = row["Latitude"]
lon = row["Longitude"]

self.process_lat_long(lat=lat, lon=lon, row_name=index)

if index > 0 and index % 50 == 0:
sleep(10)

def add_arguments(self, parser):
parser.add_argument(
"-q", "--quiet", action="store_true", help="Silence progress bars."
)

def handle(self, quiet=False, *args, **options):
self._quiet = quiet
self.add_data_sets()
self.delete_data()
df = self.get_dataframe()
self.process_data(df)
self.update_averages()
self.update_max_min()

def delete_data(self):
AreaData.objects.filter(data_type__in=self.data_types.values()).delete()

0 comments on commit e6261c8

Please sign in to comment.