diff --git a/cf/requirements.txt b/cf/requirements.txt index 91b5c41..dfb7f72 100644 --- a/cf/requirements.txt +++ b/cf/requirements.txt @@ -3,4 +3,5 @@ google-cloud-storage~=1.35.0 google-cloud-bigquery~=3.13.0 google-cloud-pubsub~=2.2.0 google-cloud-bigquery-storage~=2.10.1 -pytz~=2022.7.1 \ No newline at end of file +pytz~=2022.7.1 +db_dtypes~=1.1.1 \ No newline at end of file diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index c2163b3..0d2ebe2 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -261,7 +261,7 @@ def flatten_ga_data_check_number_of_rows(self, dates_list=["20211201", "20211202 ) query_string_sharded = f""" - SELECT _TABLE_SUFFIX as event_date, count(*) nrow + SELECT event_date, count(*) nrow FROM `{self.ga_source.gcp_project}.{self.ga_source.dataset}.{table_type}_*` WHERE _TABLE_SUFFIX BETWEEN "{dates_list[0]}" AND "{dates_list[1]}" GROUP BY 1 @@ -275,10 +275,6 @@ def flatten_ga_data_check_number_of_rows(self, dates_list=["20211201", "20211202 ) ) - dataframe_sharded['event_date'] = pd.to_datetime(dataframe_sharded['event_date'], format='%Y%m%d') - - dataframe_sharded['event_date'] = dataframe_sharded['event_date'].dt.date - assert dataframe_sharded.equals(dataframe_partitioned)