Skip to content

Commit

Permalink
Merge pull request #113 from ImperialCollegeLondon/timezone
Browse files Browse the repository at this point in the history
Adds timezone support
  • Loading branch information
dalonsoa authored Jan 9, 2024
2 parents 60a30b6 + 4a27727 commit 8256832
Show file tree
Hide file tree
Showing 12 changed files with 162 additions and 80 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
needs: qa

runs-on: ubuntu-latest

services:
postgres:
image: timescale/timescaledb-ha:pg14-latest
Expand All @@ -30,10 +30,10 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel
Expand Down
6 changes: 5 additions & 1 deletion djangomain/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"drf_yasg",
"management",
"crispy_forms",
"crispy_bootstrap4",
]

MIDDLEWARE = [
Expand Down Expand Up @@ -157,7 +158,7 @@
TIME_ZONE = "UTC"
USE_I18N = True
USE_L10N = True
USE_TZ = False
USE_TZ = True

# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/3.0/howto/static-files/
Expand Down Expand Up @@ -242,3 +243,6 @@
}

DEFAULT_AUTO_FIELD = "django.db.models.AutoField"

CRISPY_ALLOWED_TEMPLATE_PACKS = "bootstrap4"
CRISPY_TEMPLATE_PACK = "bootstrap4"
59 changes: 39 additions & 20 deletions importing/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import shutil
import time
import zoneinfo
from datetime import datetime
from logging import getLogger
from numbers import Number
Expand Down Expand Up @@ -93,19 +94,21 @@ def get_last_uploaded_date(station_id, var_code):
return datetime


def preformat_matrix(source_file, file_format):
def preformat_matrix(source_file, file_format, timezone: str):
"""
First step for importing data. Works out what sort of file is being read and adds
standardised columns for date and datetime (str). This is used in construct_matrix.
Args:
source_file: path to raw data file.
file_format: formatting.models.Format object.
timezone: Timezone name, eg. 'America/Chicago'.
Returns:
Pandas.DataFrame with raw data read and extra column(s) for date and datetime
(Str), which should be parsed correctly here.
"""
firstline = file_format.first_row if file_format.first_row else 0
skipfooter = file_format.footer_rows if file_format.footer_rows else 0
tz = zoneinfo.ZoneInfo(timezone)

if file_format.extension.value in ["xlsx", "xlx"]:
# If in Excel format
Expand Down Expand Up @@ -152,7 +155,6 @@ def preformat_matrix(source_file, file_format):
skipfooter=skipfooter,
engine=engine,
encoding="ISO-8859-1",
error_bad_lines=False,
)
else:
file = pd.read_csv(
Expand All @@ -164,14 +166,13 @@ def preformat_matrix(source_file, file_format):
skipfooter=skipfooter,
engine=engine,
encoding="ISO-8859-1",
error_bad_lines=False,
)

datetime_format = file_format.date.code + " " + file_format.time.code
if file_format.date_column == file_format.time_column:
file["date"] = pd.Series(
[
standardise_datetime(row, datetime_format)
standardise_datetime(row, datetime_format).replace(tzinfo=tz)
for row in file[file_format.date_column - 1].values
],
index=file.index,
Expand All @@ -198,7 +199,7 @@ def preformat_matrix(source_file, file_format):
)
file["date"] = pd.Series(
[
standardise_datetime(row, datetime_format)
standardise_datetime(row, datetime_format).replace(tzinfo=tz)
for row in file["datetime_str"].values
],
index=file.index,
Expand All @@ -208,7 +209,7 @@ def preformat_matrix(source_file, file_format):
return file.reset_index(drop=True)


def standardise_datetime(date_time, datetime_format):
def standardise_datetime(date_time, datetime_format) -> datetime:
"""
Returns a datetime object in the case that date_time is not already in that form.
Args:
Expand All @@ -220,7 +221,9 @@ def standardise_datetime(date_time, datetime_format):
if isinstance(date_time, datetime):
return date_time
elif isinstance(date_time, np.datetime64):
date_time = datetime.utcfromtimestamp((date_time - unix_epoch) / one_second)
date_time = datetime.utcfromtimestamp(
float((date_time - unix_epoch) / one_second)
)
return date_time
elif isinstance(date_time, str):
pass
Expand Down Expand Up @@ -271,36 +274,52 @@ def save_temp_data_to_permanent(data_import_temp):
station_id=station.station_id,
).delete()

# The following is a hack to account for the different possible name of the
# fields that the models might have. Will be made "nicer" at some point.
# This should always work as a measurement model should always have one and only
# one of "value", "average", "sum" fields.
value_field = (
set([field.name for field in Model._meta.fields])
.intersection(["value", "average", "sum"])
.pop()
)

# Bulk add new data
# TODO improve this logic to cope with variables that might have max/min
# AND depth.
if "maximum" in table.columns:
model_instances = [
Model(
time=record["date"],
value=record["value"],
station_id=record["station_id"],
maximum=record["maximum"],
minimum=record["minimum"],
{
"time": record["date"],
value_field: record["value"],
"station_id": record["station_id"],
"maximum": record["maximum"],
"minimum": record["minimum"],
},
)
for record in records
]
elif "depth" in [f.name for f in Model._meta.fields]:
model_instances = [
Model(
time=record["date"],
value=record["value"],
depth=record["depth"],
station_id=record["station_id"],
{
"time": record["date"],
value_field: record["value"],
"depth": record["depth"],
"station_id": record["station_id"],
},
)
for record in records
]
else:
model_instances = [
Model(
time=record["date"],
value=record["value"],
station_id=record["station_id"],
{
"time": record["date"],
value_field: record["value"],
"station_id": record["station_id"],
},
)
for record in records
]
Expand All @@ -321,7 +340,7 @@ def construct_matrix(matrix_source, file_format, station):
"""

# Get the "preformatted matrix" sorted by date col
matrix = preformat_matrix(matrix_source, file_format)
matrix = preformat_matrix(matrix_source, file_format, station.timezone)
# Find start and end dates from top and bottom row
start_date = matrix.loc[0, "date"]
end_date = matrix.loc[matrix.shape[0] - 1, "date"]
Expand Down
6 changes: 5 additions & 1 deletion importing/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
import os
import shutil
import urllib
from logging import getLogger

import pandas as pd
from django.contrib.auth.decorators import permission_required
from django.http import HttpResponse, JsonResponse
from rest_framework import generics
Expand Down Expand Up @@ -58,7 +60,9 @@ class DataImportTempCreate(generics.CreateAPIView):

def perform_create(self, serializer):
file = copy.deepcopy(self.request.FILES["file"])
matrix = preformat_matrix(file, serializer.validated_data["format"])
timezone = serializer.validated_data["station"].timezone
getLogger().warning(timezone)
matrix = preformat_matrix(file, serializer.validated_data["format"], timezone)
del file
# Set start and end date based on cleaned data from the file
serializer.validated_data["start_date"] = matrix.loc[0, "date"]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ psycopg2==2.9.9
pytz==2023.3.post1
PyYAML==6.0.1
uritemplate==4.1.1
crispy-bootstrap4==2023.1

## Legacy dependency versions
# asgiref==3.3.4
Expand Down
31 changes: 31 additions & 0 deletions station/migrations/0002_station_timezone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 4.2.7 on 2023-11-16 11:45

import datetime

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("station", "0001_initial"),
]

operations = [
migrations.AddField(
model_name="station",
name="timezone",
field=models.CharField(
choices=[
("London", "Europe/London"),
("Paris", "Europe/Paris"),
("New York", "America/New_York"),
],
default=datetime.datetime(
2023, 11, 16, 11, 45, 57, 843717, tzinfo=datetime.timezone.utc
),
max_length=100,
verbose_name="Timezone",
),
preserve_default=False,
),
]
5 changes: 5 additions & 0 deletions station/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
# creadoras, ya sea en uso total o parcial del código.
########################################################################################

import zoneinfo

from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.urls import reverse

TIMEZONES = tuple([(val, val) for val in sorted(zoneinfo.available_timezones())])

# Global variables used in Basin model
BASIN_IMAGE_PATH = "station/basin_image/"
BASIN_FILE_PATH = "station/basin_file/"
Expand Down Expand Up @@ -265,6 +269,7 @@ class Station(models.Model):
influence_km = models.DecimalField(
"Área of input (km)", max_digits=12, decimal_places=4, null=True, blank=True
)
timezone = models.CharField("Timezone", max_length=100, choices=TIMEZONES)

def __str__(self):
return str(self.station_code)
Expand Down
16 changes: 10 additions & 6 deletions tests/importing/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,19 @@ class TestMatrixFunctions(TestCase):

def setUp(self):
from formatting.models import Format
from station.models import Station
from station.models import TIMEZONES, Station

self.file_format = Format.objects.get(format_id=45)
self.data_file = str(
Path(__file__).parent.parent / "test_data/iMHEA_HMT_01_HI_01_raw.csv"
)
self.station = Station.objects.get(station_id=8)
self.station.timezone = TIMEZONES[0][0]

def test_preformat_matrix(self):
from importing.functions import preformat_matrix

df = preformat_matrix(self.data_file, self.file_format)
df = preformat_matrix(self.data_file, self.file_format, self.station.timezone)
self.assertEqual(df.shape, (263371, 5))

def test_construct_matrix(self):
Expand Down Expand Up @@ -85,15 +86,18 @@ def setUp(self):
from importing.functions import preformat_matrix
from importing.models import DataImportTemp
from measurement.models import Flow
from station.models import Station
from station.models import TIMEZONES, Station

self.file_format = Format.objects.get(format_id=45)
self.data_file = str(
Path(__file__).parent.parent / "test_data" / "iMHEA_HMT_01_HI_01_raw.csv"
)
self.station = Station.objects.get(station_id=8)
self.station.timezone = TIMEZONES[0][0]

matrix = preformat_matrix(self.data_file, self.file_format)
matrix = preformat_matrix(
self.data_file, self.file_format, self.station.timezone
)
start_date = matrix.loc[0, "date"]
end_date = matrix.loc[matrix.shape[0] - 1, "date"]

Expand All @@ -114,12 +118,12 @@ def setUp(self):
flow1 = Flow.objects.create(
station_id=8,
time=datetime(2014, 6, 28, 0, 35, 0, tzinfo=pytz.UTC),
value=3.4,
average=3.4,
)
flow2 = Flow.objects.create(
station_id=8,
time=datetime(2016, 3, 7, 18, 5, 0, tzinfo=pytz.UTC),
value=5.7,
average=5.7,
)

def test_get_last_uploaded_date(self):
Expand Down
7 changes: 5 additions & 2 deletions tests/importing/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,24 @@ class TestSaveImportModels(TestCase):

def setUp(self):
from formatting.models import Format
from station.models import Station
from station.models import TIMEZONES, Station

self.file_format = Format.objects.get(format_id=45)
self.data_file = str(
Path(__file__).parent.parent / "test_data/iMHEA_HMT_01_HI_01_raw.csv"
)
self.station = Station.objects.get(station_id=8)
self.station.timezone = TIMEZONES[0][0]

def test_save_import_temp(self):
from django.core.files.uploadedfile import SimpleUploadedFile

from importing.functions import preformat_matrix
from importing.models import DataImportTemp

matrix = preformat_matrix(self.data_file, self.file_format)
matrix = preformat_matrix(
self.data_file, self.file_format, self.station.timezone
)
start_date = matrix.loc[0, "date"]
end_date = matrix.loc[matrix.shape[0] - 1, "date"]

Expand Down
8 changes: 4 additions & 4 deletions tests/measurement/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ def setUp(self):
flow1 = Flow.objects.create(
station_id=1,
time=datetime(2015, 10, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=10.2,
average=10.2,
)
flow2 = Flow.objects.create(
station_id=1,
time=datetime(2016, 11, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=5.7,
average=5.7,
)
precip1 = Precipitation.objects.create(
station_id=2,
time=datetime(2017, 12, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=11.1,
sum=11.1,
)
precip2 = Precipitation.objects.create(
station_id=2,
time=datetime(2018, 1, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=0.3,
sum=0.3,
)

def test_flow(self):
Expand Down
Loading

0 comments on commit 8256832

Please sign in to comment.