forked from kmorenski/date_dimension
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_date_dimension.py
executable file
·67 lines (57 loc) · 2.62 KB
/
generate_date_dimension.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
import argparse
import datetime
from date_dimension import DateDimension
def iterate_calendar(start, end):
curr = start
one_day = datetime.timedelta(days=1)
while curr < end:
curr += one_day
yield curr
def generate_date_dimension(start, end):
e = {
'year_number_in_epoch': 0,
'half_number_in_epoch': 0,
'quarter_number_in_epoch': 0,
'month_number_in_epoch': 0,
'week_number_in_epoch': 0,
'day_number_in_epoch': 0,
}
prev_date_dim = DateDimension(start)
for d in iterate_calendar(start, end):
date_dim = DateDimension(d)
if date_dim.columns['year_key'] != prev_date_dim.columns['year_key']:
e['year_number_in_epoch'] += 1
if date_dim.columns['half_number_in_year'] != prev_date_dim.columns['half_number_in_year']:
e['half_number_in_epoch'] += 1
if date_dim.columns['quarter_number_in_year'] != prev_date_dim.columns['quarter_number_in_year']:
e['quarter_number_in_epoch'] += 1
if date_dim.columns['month_number_in_year'] != prev_date_dim.columns['month_number_in_year']:
e['month_number_in_epoch'] += 1
if date_dim.columns['week_number_in_year'] != prev_date_dim.columns['week_number_in_year']:
e['week_number_in_epoch'] += 1
if date_dim.columns['day_number_in_year'] != prev_date_dim.columns['day_number_in_year']:
e['day_number_in_epoch'] += 1
for k in e.keys():
date_dim.columns[k] = e[k]
prev_date_dim = date_dim
yield date_dim
def valid_date(s):
try:
dt = datetime.datetime.strptime(s, "%Y-%m-%d")
return dt.date()
except ValueError:
msg = "invalid date: '{0}'".format(s)
raise argparse.ArgumentTypeError(msg)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate a date dimension schema and insertion script.')
parser.add_argument('start_date', metavar='D', type=valid_date,
help='Epoch date for the generated dimension (yyyy-mm-dd format)')
parser.add_argument('-e', '--end-date', type=valid_date, default='2020-12-31',
help='End date for generated dimension (yyyy-mm-dd format)')
parser.add_argument('-o', '--output-file', type=argparse.FileType('w'), default='date_dimension_inserts.sql',
help='Output destination for the INSERT script')
args = parser.parse_args()
with args.output_file as out:
for dim in generate_date_dimension(args.start_date, args.end_date):
out.write(dim.generate_insert_statement())