Skip to content

Commit

Permalink
Use date field in traffic aggregation
Browse files Browse the repository at this point in the history
  • Loading branch information
kseebaldt committed Sep 20, 2022
1 parent a0f8ce6 commit 8f8fb12
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion sample_pipelines/traffic/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def aggregate_incidents(paths, secrets, spark, glueContext):
"geojson",
h3_pyspark.h3_to_geo_boundary(F.col("h3_index"), F.lit(True)),
)
.withColumn("month", F.date_format("published_date", "yyyy-MM"))
.withColumn("month", F.trunc("published_date", "MM"))
.groupBy("month", "h3_index", "issue_reported")
.agg(F.count("*").alias("count"), F.first("geojson").alias("geojson"))
)
Expand Down
7 changes: 4 additions & 3 deletions tests/sample_pipelines/traffic/analytics_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from chispa.dataframe_comparer import assert_df_equality
from dateutil.parser import isoparse
from datetime import date
from pytest import fixture

from sample_pipelines.traffic.analytics import aggregate_incidents
Expand Down Expand Up @@ -63,9 +64,9 @@ def test_aggregation(paths, secrets, spark, glueContext, input_df):

expected = spark.createDataFrame(
[
("2018-06", "89489e3464fffff", "Incident 1", 2),
("2018-06", "89489e3464fffff", "Incident 2", 1),
("2018-07", "89489e3464fffff", "Incident 1", 1),
(date.fromisoformat("2018-06-01"), "89489e3464fffff", "Incident 1", 2),
(date.fromisoformat("2018-06-01"), "89489e3464fffff", "Incident 2", 1),
(date.fromisoformat("2018-07-01"), "89489e3464fffff", "Incident 1", 1),
],
["month", "h3_index", "issue_reported", "count"],
)
Expand Down

0 comments on commit 8f8fb12

Please sign in to comment.