Skip to content

Commit

Permalink
Add tests for low-level data acquisition from DWD
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Aug 30, 2021
1 parent 479622e commit eaacc3c
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 10 deletions.
60 changes: 60 additions & 0 deletions tests/provider/dwd/observation/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2018-2021, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import re
import zipfile

import pytest

from wetterdienst.exceptions import FailedDownload, ProductFileNotFound
from wetterdienst.provider.dwd.observation.download import (
__download_climate_observations_data,
)


def test_download_climate_observations_data_failure_invalid_url():
with pytest.raises(FailedDownload) as ex:
__download_climate_observations_data("foobar.txt")
assert ex.match(re.escape("Download failed for foobar.txt: InvalidURL(foobar.txt)"))


def test_download_climate_observations_data_failure_invalid_protocol():
with pytest.raises(FailedDownload) as ex:
__download_climate_observations_data("foobar://bazqux.txt")
assert ex.match(
re.escape("Download failed for foobar://bazqux.txt: AssertionError()")
)


def test_download_climate_observations_data_failure_invalid_zip():
with pytest.raises(zipfile.BadZipFile) as ex:
__download_climate_observations_data("http://example.org")
assert ex.match(
re.escape("The Zip archive http://example.org seems to be corrupted")
)


def test_download_climate_observations_data_failure_broken_zip():
with pytest.raises(zipfile.BadZipFile) as ex:
__download_climate_observations_data(
"https://github.com/earthobservations/testdata/raw/main/opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/broken-zip-archive.zip"
)
assert ex.match("The Zip archive .+ seems to be corrupted")


def test_download_climate_observations_data_failure_empty_zip():
with pytest.raises(ProductFileNotFound) as ex:
__download_climate_observations_data(
"https://github.com/earthobservations/testdata/raw/main/opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/empty-zip-archive.zip"
)
assert ex.match("The archive .+ does not contain a 'produkt\\*' file")


def test_download_climate_observations_data_valid():
payload = __download_climate_observations_data(
"https://github.com/earthobservations/testdata/raw/main/opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_00011_akt.zip"
)
assert payload.startswith(
b"STATIONS_ID;MESS_DATUM;QN_3; FX; FM;QN_4; RSK;RSKF; SDK;SHK_TAG; NM; VPM;"
b" PM; TMK; UPM; TXK; TNK; TGK;eor\r\n 11;20200227; 10; 28.5; "
)
22 changes: 12 additions & 10 deletions wetterdienst/provider/dwd/observation/download.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2018-2021, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import logging
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import List, Tuple
from zipfile import BadZipFile

from fsspec.implementations.zip import ZipFileSystem
from requests.exceptions import InvalidURL

from wetterdienst.exceptions import (
FailedDownload,
Expand All @@ -17,6 +17,9 @@
from wetterdienst.util.cache import CacheExpiry
from wetterdienst.util.network import download_file

log = logging.getLogger(__name__)


PRODUCT_FILE_IDENTIFIER = "produkt"


Expand Down Expand Up @@ -57,12 +60,11 @@ def __download_climate_observations_data(remote_file: str) -> bytes:

try:
file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES)
except InvalidURL as e:
raise InvalidURL(
f"Error: the station data {remote_file} could not be reached."
) from e
except Exception:
raise FailedDownload(f"Download failed for {remote_file}")
except Exception as ex:
ex_type = ex.__class__.__name__
msg = f"Download failed for {remote_file}: {ex_type}({ex})"
log.exception(msg)
raise FailedDownload(msg)

try:
zf = ZipFileSystem(file)
Expand All @@ -73,15 +75,15 @@ def __download_climate_observations_data(remote_file: str) -> bytes:
# Raise exceptions if no corresponding file was found or if there are multiple product files.
if not product_files:
raise ProductFileNotFound(
f"The archive {remote_file} does not contain a '{PRODUCT_FILE_IDENTIFIER}*' file."
f"The archive {remote_file} does not contain a '{PRODUCT_FILE_IDENTIFIER}*' file"
)
elif len(product_files) > 1:
raise MultipleProductFilesFound(
f"The archive {remote_file} contains multiple product files, which is ambiguous."
f"The archive {remote_file} contains multiple product files, which is ambiguous"
)

file_in_bytes = zf.open(product_files[0]).read()
return file_in_bytes

except BadZipFile as e:
raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e
raise BadZipFile(f"The Zip archive {remote_file} seems to be corrupted") from e

0 comments on commit eaacc3c

Please sign in to comment.