Skip to content

Commit

Permalink
Large refactoring (#188)
Browse files Browse the repository at this point in the history
The idea behind this is to make the code base more approachable and
to make room for data from other providers than DWD.
  • Loading branch information
amotl authored Sep 26, 2020
1 parent 5b6ea70 commit 5c8f589
Show file tree
Hide file tree
Showing 92 changed files with 1,625 additions and 1,648 deletions.
2 changes: 1 addition & 1 deletion .github/release/standard.test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sut:
build: ../..
dockerfile: .github/release/standard/Dockerfile
command: wetterdienst about parameters
command: wetterdienst dwd about parameters
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Changelog
Development
===========

...
- Large refactoring

0.8.0 (25.09.2020)
==================
Expand Down
5 changes: 1 addition & 4 deletions docs/pages/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,6 @@ The function returns a Pandas DataFrame with information about the available sta
The column ``HAS_FILE`` indicates whether the station actually has a file with data on
the server. That might not always be the case for stations which have been phased out.

When using ``create_new_file_index=True``, the function can be forced to retrieve
a new list of files from the server. Otherwise, data will be served from the
cache because this information rarely changes.

Measurements
============
Expand Down Expand Up @@ -116,7 +113,7 @@ Inquire the list of stations by geographic coordinates.
.. code-block:: python
from datetime import datetime
from wetterdienst import get_nearby_stations_by_number, DWDStationRequest
from wetterdienst import DWDStationRequest, get_nearby_stations_by_number
from wetterdienst import Parameter, PeriodType, TimeResolution
stations = get_nearby_stations_by_number(
Expand Down
1 change: 0 additions & 1 deletion docs/pages/behind_the_scenes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ Behind the scenes
- ``create_file_list_for_climate_observations``
- is used with the help of the metadata to retrieve file paths to
files for a set of parameters + station id
- here also **create_new_file_index** can be used

- ``download_climate_observations_data_parallel``
- is used with the created file paths to download and store the data
Expand Down
197 changes: 102 additions & 95 deletions docs/pages/cli.rst

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions example/mosmix.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
Other MOSMIX variants are also listed and can be
enabled on demand.
"""
from wetterdienst.additionals.util import setup_logging
from wetterdienst.mosmix import MOSMIXReader
from wetterdienst.util.cli import setup_logging
from wetterdienst.dwd.mosmix import MOSMIXReader


def mosmix_example():
Expand Down
3 changes: 2 additions & 1 deletion example/simple_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"from wetterdienst import metadata_for_climate_observations, \\\n",
" collect_climate_observations_data, get_nearby_stations_by_number, \\\n",
" get_nearby_stations_by_number, \\\n",
" discover_climate_observations\n",
"from wetterdienst import PeriodType, TimeResolution, Parameter\n",
"from wetterdienst.dwd.observations.access import collect_climate_observations_data\n",
"\n",
"%matplotlib inline\n",
"import matplotlib as mpl\n",
Expand Down
2 changes: 1 addition & 1 deletion example/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging

from wetterdienst import DWDStationRequest
from wetterdienst import Parameter, PeriodType, TimeResolution
from wetterdienst import TimeResolution, Parameter, PeriodType

log = logging.getLogger()

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,67 +1,53 @@
""" Tests for data_collection """
from datetime import datetime

import mock
import pytest
from mock import patch
from mock import MagicMock, patch
from pathlib import Path
import pandas as pd
from io import StringIO, BytesIO
from shutil import rmtree

from wetterdienst.enumerations.column_names_enumeration import DWDMetaColumns
from wetterdienst.enumerations.parameter_enumeration import Parameter
from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution
from wetterdienst.enumerations.period_type_enumeration import PeriodType
from wetterdienst.data_collection import (
from wetterdienst.dwd.observations.access import (
collect_climate_observations_data,
_tidy_up_data,
collect_radolan_data,
)
from wetterdienst.dwd.metadata.parameter import Parameter
from wetterdienst import TimeResolution
from wetterdienst.dwd.metadata.period_type import PeriodType


TESTS_DIR = Path(__file__).parent

FIXTURES_DIR = TESTS_DIR / "fixtures"

TEMPORARY_DATA_DIR = TESTS_DIR / "dwd_data"
TEMPORARY_DATA_DIR.mkdir(parents=True, exist_ok=True)
HERE = Path(__file__).parent

# Set filename for mock
filename = "tageswerte_KL_00001_19370101_19860630_hist.zip"

# Loading test data
TEST_FILE = pd.read_json(FIXTURES_DIR / "FIXED_STATIONDATA.JSON")
TEST_FILE = pd.read_json(HERE / "FIXED_STATIONDATA.JSON")

# Prepare csv for regular "downloading" test
CSV_FILE = StringIO()
TEST_FILE.to_csv(CSV_FILE, sep=";")
CSV_FILE.seek(0)


@pytest.mark.xfail
@patch(
"wetterdienst.data_collection.create_file_list_for_climate_observations",
mock.MagicMock(
return_value=pd.DataFrame({DWDMetaColumns.FILENAME.value: [filename]})
),
"wetterdienst.dwd.observations.fileindex.create_file_list_for_climate_observations",
MagicMock(return_value=[filename]),
)
@patch(
"wetterdienst.data_collection.download_climate_observations_data_parallel",
mock.MagicMock(return_value=[(filename, BytesIO(CSV_FILE.read().encode()))]),
"wetterdienst.dwd.observations.access.download_climate_observations_data_parallel",
MagicMock(return_value=[(filename, BytesIO(CSV_FILE.read().encode()))]),
)
def test_collect_dwd_data():
def test_collect_dwd_data_success():
""" Test for data collection """
"""
1. Scenario
This scenario makes sure we take fresh data and write it to the given folder, thus
we can run just another test afterwards as no old data is used
"""
collect_climate_observations_data(
assert collect_climate_observations_data(
station_ids=[1],
parameter=Parameter.CLIMATE_SUMMARY,
time_resolution=TimeResolution.DAILY,
period_type=PeriodType.HISTORICAL,
folder=TEMPORARY_DATA_DIR,
prefer_local=False,
write_file=True,
tidy_data=False,
Expand All @@ -72,31 +58,29 @@ def test_collect_dwd_data():
This scenario tries to get the data from the given folder. This data was placed by
the first test and is now restored
"""
collect_climate_observations_data(
assert collect_climate_observations_data(
station_ids=[1],
parameter=Parameter.CLIMATE_SUMMARY,
time_resolution=TimeResolution.DAILY,
period_type=PeriodType.HISTORICAL,
folder=TEMPORARY_DATA_DIR,
prefer_local=True,
write_file=True,
tidy_data=False,
).equals(TEST_FILE)

# Remove storage folder
rmtree(TEMPORARY_DATA_DIR)

# Have to place an assert afterwards to ensure that above function is executed
assert True
# Have to place an assert afterwards to ensure that above function is executed.
# WTF!!!
# assert True


@pytest.mark.xfail
@patch(
"wetterdienst.data_collection.restore_climate_observations",
mock.MagicMock(return_value=pd.DataFrame()),
"wetterdienst.dwd.observations.store.restore_climate_observations",
MagicMock(return_value=pd.DataFrame()),
)
@patch(
"wetterdienst.data_collection.create_file_list_for_climate_observations",
mock.MagicMock(return_value=pd.DataFrame(columns=[DWDMetaColumns.FILENAME.value])),
"wetterdienst.dwd.observations.fileindex.create_file_list_for_climate_observations",
MagicMock(return_value=[]),
)
def test_collect_dwd_data_empty():
""" Test for data collection with no available data """
Expand All @@ -105,12 +89,12 @@ def test_collect_dwd_data_empty():
1. Scenario
Test for request where no data is available
"""

assert collect_climate_observations_data(
station_ids=[1],
station_ids=[1048],
parameter=Parameter.CLIMATE_SUMMARY,
time_resolution=TimeResolution.DAILY,
period_type=PeriodType.HISTORICAL,
folder="",
period_type=PeriodType.RECENT,
prefer_local=True,
write_file=False,
tidy_data=False,
Expand Down Expand Up @@ -239,26 +223,3 @@ def test_tidy_up_data():
)

assert _tidy_up_data(df, Parameter.CLIMATE_SUMMARY).equals(df_tidy)


@pytest.mark.remote
def test_collect_radolan_data():
with Path(FIXTURES_DIR, "radolan_hourly_201908080050").open("rb") as f:
radolan_hourly = BytesIO(f.read())

radolan_hourly_test = collect_radolan_data(
date_times=[datetime(year=2019, month=8, day=8, hour=0, minute=50)],
time_resolution=TimeResolution.HOURLY,
)[0][1]

assert radolan_hourly.getvalue() == radolan_hourly_test.getvalue()

with Path(FIXTURES_DIR, "radolan_daily_201908080050").open("rb") as f:
radolan_daily = BytesIO(f.read())

radolan_daily_test = collect_radolan_data(
date_times=[datetime(year=2019, month=8, day=8, hour=0, minute=50)],
time_resolution=TimeResolution.DAILY,
)[0][1]

assert radolan_daily.getvalue() == radolan_daily_test.getvalue()
52 changes: 21 additions & 31 deletions tests/test_api.py → tests/dwd/observations/test_api.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from datetime import datetime
from io import BytesIO
from pathlib import Path
import json

import pytest
import pandas as pd

from wetterdienst.api import DWDStationRequest, DWDRadolanRequest
from wetterdienst import (
discover_climate_observations,
TimeResolution,
Parameter,
PeriodType,
)
from wetterdienst.dwd.observations.api import DWDStationRequest
from wetterdienst.exceptions import StartDateEndDateError
from wetterdienst.enumerations.parameter_enumeration import Parameter
from wetterdienst.enumerations.period_type_enumeration import PeriodType
from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution


FIXTURES_DIR = Path(__file__).parent / "fixtures"


def test_dwd_station_request():
Expand Down Expand Up @@ -107,25 +105,17 @@ def test_dwd_station_request():
)


@pytest.mark.remote
def test_dwd_radolan_request():
with pytest.raises(ValueError):
DWDRadolanRequest(
time_resolution=TimeResolution.MINUTE_1, date_times=["2019-08-08 00:50:00"]
)

request = DWDRadolanRequest(
time_resolution=TimeResolution.HOURLY, date_times=["2019-08-08 00:50:00"]
)

assert request == DWDRadolanRequest(
TimeResolution.HOURLY,
[datetime(year=2019, month=8, day=8, hour=0, minute=50, second=0)],
def test_discover_climate_observations():
assert discover_climate_observations(
TimeResolution.DAILY, Parameter.CLIMATE_SUMMARY
) == json.dumps(
{
str(TimeResolution.DAILY): {
str(Parameter.CLIMATE_SUMMARY): [
str(PeriodType.HISTORICAL),
str(PeriodType.RECENT),
]
}
},
indent=4,
)

with Path(FIXTURES_DIR, "radolan_hourly_201908080050").open("rb") as f:
radolan_hourly = BytesIO(f.read())

radolan_hourly_test = next(request.collect_data())[1]

assert radolan_hourly.getvalue() == radolan_hourly_test.getvalue()
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
import pytest
import requests

from wetterdienst.enumerations.column_names_enumeration import DWDMetaColumns
from wetterdienst.indexing.file_index_creation import (
from wetterdienst.dwd.metadata.column_names import DWDMetaColumns
from wetterdienst.dwd.observations.fileindex import (
create_file_index_for_climate_observations,
create_file_list_for_climate_observations,
)
from wetterdienst.enumerations.parameter_enumeration import Parameter
from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution
from wetterdienst.enumerations.period_type_enumeration import PeriodType
from wetterdienst import TimeResolution, Parameter, PeriodType


@pytest.mark.remote
Expand All @@ -33,3 +32,16 @@ def test_file_index_creation():
create_file_index_for_climate_observations(
Parameter.CLIMATE_SUMMARY, TimeResolution.MINUTE_1, PeriodType.HISTORICAL
)


def test_create_file_list_for_dwd_server():
remote_file_path = create_file_list_for_climate_observations(
station_ids=[1048],
parameter=Parameter.CLIMATE_SUMMARY,
time_resolution=TimeResolution.DAILY,
period_type=PeriodType.RECENT,
)
assert remote_file_path == [
"https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/"
"daily/kl/recent/tageswerte_KL_01048_akt.zip"
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import pytest
from pandas import Timestamp

from wetterdienst.enumerations.column_names_enumeration import DWDMetaColumns
from wetterdienst.indexing.meta_index_creation import (
from wetterdienst.dwd.metadata.column_names import DWDMetaColumns
from wetterdienst.dwd.observations.metaindex import (
create_meta_index_for_climate_observations,
)
from wetterdienst.enumerations.parameter_enumeration import Parameter
from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution
from wetterdienst.enumerations.period_type_enumeration import PeriodType
from wetterdienst.dwd.metadata.parameter import Parameter
from wetterdienst import TimeResolution
from wetterdienst.dwd.metadata.period_type import PeriodType


@pytest.mark.remote
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
from pathlib import Path
import pandas as pd

from wetterdienst.enumerations.parameter_enumeration import Parameter
from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution
from wetterdienst.parsing_data.parse_data_from_files import (
from wetterdienst.dwd.metadata.parameter import Parameter
from wetterdienst import TimeResolution
from wetterdienst.dwd.observations.parser import (
parse_climate_observations_data,
)

FIXTURES_DIR = Path(__file__).parent.parent / "fixtures"
HERE = Path(__file__).parent


def test_parse_dwd_data():
filename = "tageswerte_KL_00001_19370101_19860630_hist.zip"

station_data_original = pd.read_json(FIXTURES_DIR / "FIXED_STATIONDATA.JSON")
station_data_original = pd.read_json(HERE / "FIXED_STATIONDATA.JSON")
file_in_bytes: Union[StringIO, BytesIO] = StringIO()
station_data_original.to_csv(file_in_bytes, sep=";")
file_in_bytes.seek(0)
Expand Down
Loading

0 comments on commit 5c8f589

Please sign in to comment.