-
Notifications
You must be signed in to change notification settings - Fork 0
/
dwd_merge_weather_csv_with_station_pos_and_name.py
150 lines (130 loc) · 5.72 KB
/
dwd_merge_weather_csv_with_station_pos_and_name.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
import csv
from datetime import datetime, timedelta
from glob import glob
from io import TextIOWrapper
from os import stat
from os.path import join
from zipfile import ZipFile
class Station:
name: str
start: datetime
end: datetime
latitude: float
longitude: float
height: float
print("merge dwd_merge_weather_csv_with_station_pos_and_name")
with open('dwd_merge_weather_csv_with_station_pos_and_name.csv', 'w', newline='') as export:
fieldnames = [
'stationid',
'stationname',
'date',
'latitude',
'longitude',
'height',
'quality_wind',
'wind_max',
'wind_mean',
'quality_rest',
'niederschlagshöhe',
'niederschlagsform_id',
'niederschlagsform_description',
'sonnenscheindauer',
'schneehoehe',
'bedeckungsgrad_mean',
'dampfdruck_mean',
'luftdruck_mean',
'temperatur_mean',
'relativen_feuchte_mean',
'temperatur_in_hoehe_2m_max',
'temperatur_in_hoehe_2m_min',
'temperatur_in_hoehe_5cm_min',
'faulty_station_metadata'
]
writer = csv.DictWriter(export, fieldnames=fieldnames)
writer.writeheader()
for file in glob("crawler/*.zip"):
err_str = ""
print(f"merging {file}")
zipfile = ZipFile(file, 'r')
namelist = zipfile.namelist()
# get relevant meta data
station_geographie_csv = zipfile.open(
next(x for x in namelist if x.startswith("Metadaten_Geographie_")))
csv_reader = csv.DictReader(TextIOWrapper(
station_geographie_csv, 'latin-1'), delimiter=';')
station_list = []
err_str += " Stations in this dataset:\n"
for row in csv_reader:
station = Station()
station.name = row["Stationsname"]
station.latitude = row["Geogr.Breite"]
station.longitude = row["Geogr.Laenge"]
station.start = datetime.strptime(row["von_datum"], r"%Y%m%d")
if row["bis_datum"] != " ":
station.end = datetime.strptime(row["bis_datum"], r"%Y%m%d")
else:
station.end = datetime(9999, month=12, day=31)
station.height = row["Stationshoehe"]
err_str += f" Name: {station.name} {station.start.strftime(r'%d.%m.%Y')}-{station.end.strftime(r'%d.%m.%Y')} | ({station.latitude}, {station.longitude}) {station.height}\n"
station_list.append(station)
data_csv = zipfile.open(
next(x for x in namelist if "produkt_klima_tag" in x))
csv_reader = csv.DictReader(
TextIOWrapper(data_csv, 'latin-1'), delimiter=';')
err_str += f" Parsing rows...\n"
row_counter = 0
for row in csv_reader:
row_counter += 1
faulty = False
date = datetime.strptime(row["MESS_DATUM"], r"%Y%m%d")
try:
station = next(
s for s in station_list if s.start <= date and date <= s.end)
except StopIteration:
err_str += f" Error - No Station found for time {date.strftime(r'%d.%m.%Y')} (row {row_counter}) added with faulty flag\n"
unknown = Station()
unknown.latitude = "-999"
unknown.longitude = "-999"
unknown.height = "-999"
unknown.name = "Unknown"
faulty = True
print(err_str, end='')
err_str = ""
RSKF_dict: dict = dict()
RSKF_dict[0] = "kein Niederschlag (konventionelle oder automatische Messung), entspricht WMO Code-Zahl 10"
RSKF_dict[1] = "nur Regen (in historischen Daten vor 1979)"
RSKF_dict[2] = "-999"
RSKF_dict[3] = "-999"
RSKF_dict[4] = "Form nicht bekannt, obwohl Niederschlag gemeldet"
RSKF_dict[5] = "-999"
RSKF_dict[6] = "nur Regen; flüssiger Niederschlag bei automatischen Stationen, entspricht WMO Code-Zahl 11"
RSKF_dict[7] = "nur Schnee; fester Niederschlag bei automatischen Stationen, entspricht WMO Code-Zahl 12"
RSKF_dict[8] = "Regen und Schnee (und/oder Schneeregen); flüssiger und fester Niederschlag bei automatischen Stationen, entspricht WMO Code-Zahl 13"
RSKF_dict[9] = "fehlender Wert oder Niederschlagsform nicht feststellbar bei automatischer Messung, entspricht WMO Code-Zahl 15"
RSKF_dict[-999] = "-999"
writer.writerow({
'stationid': row["STATIONS_ID"].strip(),
'stationname': station.name.strip(),
'date': row["MESS_DATUM"].strip(),
'latitude': station.latitude.strip(),
'longitude': station.longitude.strip(),
'height': station.height.strip(),
'wind_max': row[" FX"].strip(),
'wind_mean': row[" FM"].strip(),
'niederschlagshöhe': row[" RSK"].strip(),
'niederschlagsform_id': row["RSKF"].strip(),
'sonnenscheindauer': row[" SDK"].strip(),
'schneehoehe': row["SHK_TAG"].strip(),
'bedeckungsgrad_mean': row[" NM"].strip(),
'dampfdruck_mean': row[" VPM"].strip(),
'luftdruck_mean': row[" PM"].strip(),
'temperatur_mean': row[" TMK"].strip(),
'relativen_feuchte_mean': row[" UPM"].strip(),
'temperatur_in_hoehe_2m_max': row[" TXK"].strip(),
'temperatur_in_hoehe_2m_min': row[" TNK"].strip(),
'temperatur_in_hoehe_5cm_min': row[" TGK"].strip(),
'faulty_station_metadata': faulty
})
print("finished press enter to exit...")
input()