Skip to content

Commit

Permalink
Merge pull request #136 from ImperialCollegeLondon/develop
Browse files Browse the repository at this point in the history
Allow column selection in DSR GET
  • Loading branch information
AdrianDAlessandro authored Sep 27, 2023
2 parents 28672e0 + d23211d commit abb6f98
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 19 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@ repos:
- id: check-merge-conflict
- id: debug-statements
- repo: https://github.com/psf/black
rev: "23.7.0"
rev: "23.9.1"
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.0.282'
rev: 'v0.0.291'
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.4.1"
rev: "v1.5.1"
hooks:
- id: mypy
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.35.0
rev: v0.37.0
hooks:
- id: markdownlint
args: ["--disable", "MD013", "--"]
6 changes: 6 additions & 0 deletions datahub/dsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ class Config:
allow_population_by_field_name = True


dsr_headers = {
field["title"]: name
for name, field in DSRModel.schema(by_alias=False)["properties"].items()
}


def validate_dsr_data(data: dict[str, NDArray | str]) -> None:
"""Validate the shapes of the arrays in the DSR data.
Expand Down
46 changes: 36 additions & 10 deletions datahub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from . import data as dt
from . import log
from .dsr import read_dsr_file, validate_dsr_data
from .dsr import dsr_headers, read_dsr_file, validate_dsr_data
from .opal import OpalArrayData, OpalModel
from .wesim import get_wesim

Expand Down Expand Up @@ -155,42 +155,68 @@ def upload_dsr(file: UploadFile) -> dict[str, str | None]:


@app.get("/dsr", response_class=ORJSONResponse)
def get_dsr_data(start: int = 0, end: int | None = None) -> ORJSONResponse:
def get_dsr_data(
start: int = -1, end: int | None = None, col: str | None = None
) -> ORJSONResponse:
"""GET method function for getting DSR data as JSON.
It takes optional query parameters of:
- `start`: Starting index for exported list
- `end`: Last index that will be included in exported list
- `start`: Starting index for exported list. Defaults to -1 for the most recent
entry only.
- `end`: Last index that will be included in exported list.
- `col`: A comma-separated list of which columns/keys within the data to get.
These values are all lower-case and spaces are replaced by underscores.
And returns a dictionary containing the DSR data in JSON format.
This can be converted back to a DataFrame using the following:
`pd.DataFrame(**data)`
TODO: Ensure data is json serializable or returned in binary format
\f
Args:
start: Starting index for exported list
end: Last index that will be included in exported list
col: Column names to filter by, multiple values seperated by comma
Returns:
A Dict containing the DSR list
""" # noqa: D301
log.info("Sending DSR data...")
log.debug(f"Query parameters:\n\nstart={start}\nend={end}\n")
log.debug(f"Query parameters:\n\nstart={start}\nend={end}\ncol={col}\n")
if isinstance(end, int) and end < start:
message = "End parameter cannot be less than Start parameter."
log.error(message)
raise HTTPException(status_code=400, detail=message)

log.info("Filtering data...")
log.info("Filtering data by index...")
log.debug(f"Current DSR data length:\n\n{len(dt.dsr_data)}")
filtered_data = dt.dsr_data[start : end + 1 if end else end]
data = dt.dsr_data.copy()
filtered_index_data = data[start : end + 1 if end else end]
log.debug(f"Filtered DSR data length:\n\n{len(dt.dsr_data)}")

return ORJSONResponse({"data": filtered_data})
if isinstance(col, str):
log.debug(f"Columns:\n\n{col.split(',')}\n")
columns = col.lower().split(",")

for col_name in columns:
if col_name not in dsr_headers.values():
message = "One or more of the specified columns are invalid."
log.error(message)
raise HTTPException(status_code=400, detail=message)

log.info("Filtering data by column...")
filtered_data = []
for frame in filtered_index_data:
filtered_keys = {}
for key in frame.keys():
if dsr_headers[key.title()] in columns:
filtered_keys[key] = frame[key]
filtered_data.append(filtered_keys)

return ORJSONResponse({"data": filtered_data})

return ORJSONResponse({"data": filtered_index_data})


@app.get("/wesim")
Expand Down
8 changes: 4 additions & 4 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
#
# pip-compile --extra=dev --output-file=requirements-dev.txt
#
anyio==3.7.0
anyio==4.0.0
# via
# httpcore
# starlette
attrs==23.1.0
# via pytest-mypy
black==23.3.0
# via datahub (pyproject.toml)
build==0.10.0
build==1.0.0
# via pip-tools
certifi==2023.7.22
# via
Expand Down Expand Up @@ -49,7 +49,7 @@ h11==0.14.0
# uvicorn
h5py==3.9.0
# via datahub (pyproject.toml)
httpcore==0.17.2
httpcore==0.17.3
# via httpx
httpx==0.24.1
# via datahub (pyproject.toml)
Expand Down Expand Up @@ -129,7 +129,7 @@ pyxlsb==1.0.10
# via pandas
pyyaml==6.0
# via pre-commit
ruff==0.0.282
ruff==0.0.287
# via datahub (pyproject.toml)
six==1.16.0
# via python-dateutil
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# pip-compile
#
anyio==3.7.0
anyio==4.0.0
# via starlette
click==8.1.6
# via uvicorn
Expand Down
31 changes: 31 additions & 0 deletions tests/test_dsr_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,37 @@ def test_get_dsr_api(dsr_data):
new_data = dsr_data.copy()
new_data["Name"] = "A new entry"
dt.dsr_data.append(new_data)
new_data = dsr_data.copy()
new_data["Name"] = "Another new entry"
dt.dsr_data.append(new_data)

response = client.get("/dsr")
assert response.json()["data"][0]["Name"] == dt.dsr_data[2]["Name"]

# Checks index filtering
response = client.get("/dsr?start=1")
assert len(response.json()["data"]) == 2
assert response.json()["data"][0]["Name"] == dt.dsr_data[1]["Name"]
assert response.json()["data"][1]["Name"] == dt.dsr_data[2]["Name"]

response = client.get("/dsr?start=0&end=1")
assert len(response.json()["data"]) == 2
assert response.json()["data"][0]["Name"] == dt.dsr_data[0]["Name"]
assert response.json()["data"][1]["Name"] == dt.dsr_data[1]["Name"]

# Checks column filtering
response = client.get("/dsr?col=activities")
assert len(response.json()["data"][0].keys()) == 1
assert "Activities" in response.json()["data"][0].keys()

response = client.get("/dsr?col=activity_types,kwh_cost")
assert len(response.json()["data"][0].keys()) == 2
assert "Activity Types" in response.json()["data"][0].keys()
assert "kWh Cost" in response.json()["data"][0].keys()

response = client.get("/dsr?start=1&col=activities")
assert len(response.json()["data"]) == 2
assert len(response.json()["data"][0].keys()) == 1
assert "Activities" in response.json()["data"][0].keys()
assert len(response.json()["data"][1].keys()) == 1
assert "Activities" in response.json()["data"][1].keys()

0 comments on commit abb6f98

Please sign in to comment.