From eaacc3cda2aed46f39e5b2ec2f07ad13f6bf6a2e Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Mon, 30 Aug 2021 20:36:33 +0200 Subject: [PATCH] Add tests for low-level data acquisition from DWD --- .../provider/dwd/observation/test_download.py | 60 +++++++++++++++++++ .../provider/dwd/observation/download.py | 22 +++---- 2 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 tests/provider/dwd/observation/test_download.py diff --git a/tests/provider/dwd/observation/test_download.py b/tests/provider/dwd/observation/test_download.py new file mode 100644 index 000000000..dccf68f83 --- /dev/null +++ b/tests/provider/dwd/observation/test_download.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2018-2021, earthobservations developers. +# Distributed under the MIT License. See LICENSE for more info. +import re +import zipfile + +import pytest + +from wetterdienst.exceptions import FailedDownload, ProductFileNotFound +from wetterdienst.provider.dwd.observation.download import ( + __download_climate_observations_data, +) + + +def test_download_climate_observations_data_failure_invalid_url(): + with pytest.raises(FailedDownload) as ex: + __download_climate_observations_data("foobar.txt") + assert ex.match(re.escape("Download failed for foobar.txt: InvalidURL(foobar.txt)")) + + +def test_download_climate_observations_data_failure_invalid_protocol(): + with pytest.raises(FailedDownload) as ex: + __download_climate_observations_data("foobar://bazqux.txt") + assert ex.match( + re.escape("Download failed for foobar://bazqux.txt: AssertionError()") + ) + + +def test_download_climate_observations_data_failure_invalid_zip(): + with pytest.raises(zipfile.BadZipFile) as ex: + __download_climate_observations_data("http://example.org") + assert ex.match( + re.escape("The Zip archive http://example.org seems to be corrupted") + ) + + +def test_download_climate_observations_data_failure_broken_zip(): + with pytest.raises(zipfile.BadZipFile) as ex: + __download_climate_observations_data( + "https://github.com/earthobservations/testdata/raw/main/opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/broken-zip-archive.zip" + ) + assert ex.match("The Zip archive .+ seems to be corrupted") + + +def test_download_climate_observations_data_failure_empty_zip(): + with pytest.raises(ProductFileNotFound) as ex: + __download_climate_observations_data( + "https://github.com/earthobservations/testdata/raw/main/opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/empty-zip-archive.zip" + ) + assert ex.match("The archive .+ does not contain a 'produkt\\*' file") + + +def test_download_climate_observations_data_valid(): + payload = __download_climate_observations_data( + "https://github.com/earthobservations/testdata/raw/main/opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_00011_akt.zip" + ) + assert payload.startswith( + b"STATIONS_ID;MESS_DATUM;QN_3; FX; FM;QN_4; RSK;RSKF; SDK;SHK_TAG; NM; VPM;" + b" PM; TMK; UPM; TXK; TNK; TGK;eor\r\n 11;20200227; 10; 28.5; " + ) diff --git a/wetterdienst/provider/dwd/observation/download.py b/wetterdienst/provider/dwd/observation/download.py index 8b7c0a7cd..1f2445215 100644 --- a/wetterdienst/provider/dwd/observation/download.py +++ b/wetterdienst/provider/dwd/observation/download.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2018-2021, earthobservations developers. # Distributed under the MIT License. See LICENSE for more info. +import logging from concurrent.futures import ThreadPoolExecutor from io import BytesIO from typing import List, Tuple from zipfile import BadZipFile from fsspec.implementations.zip import ZipFileSystem -from requests.exceptions import InvalidURL from wetterdienst.exceptions import ( FailedDownload, @@ -17,6 +17,9 @@ from wetterdienst.util.cache import CacheExpiry from wetterdienst.util.network import download_file +log = logging.getLogger(__name__) + + PRODUCT_FILE_IDENTIFIER = "produkt" @@ -57,12 +60,11 @@ def __download_climate_observations_data(remote_file: str) -> bytes: try: file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES) - except InvalidURL as e: - raise InvalidURL( - f"Error: the station data {remote_file} could not be reached." - ) from e - except Exception: - raise FailedDownload(f"Download failed for {remote_file}") + except Exception as ex: + ex_type = ex.__class__.__name__ + msg = f"Download failed for {remote_file}: {ex_type}({ex})" + log.exception(msg) + raise FailedDownload(msg) try: zf = ZipFileSystem(file) @@ -73,15 +75,15 @@ def __download_climate_observations_data(remote_file: str) -> bytes: # Raise exceptions if no corresponding file was found or if there are multiple product files. if not product_files: raise ProductFileNotFound( - f"The archive {remote_file} does not contain a '{PRODUCT_FILE_IDENTIFIER}*' file." + f"The archive {remote_file} does not contain a '{PRODUCT_FILE_IDENTIFIER}*' file" ) elif len(product_files) > 1: raise MultipleProductFilesFound( - f"The archive {remote_file} contains multiple product files, which is ambiguous." + f"The archive {remote_file} contains multiple product files, which is ambiguous" ) file_in_bytes = zf.open(product_files[0]).read() return file_in_bytes except BadZipFile as e: - raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e + raise BadZipFile(f"The Zip archive {remote_file} seems to be corrupted") from e