From deb26a7fe1e36b23c42bddf92ba77ce5f800a2db Mon Sep 17 00:00:00 2001 From: ahmedabu98 Date: Fri, 22 Sep 2023 16:30:59 -0400 Subject: [PATCH] pass thru all timestamps; add explicit -1 timestamp test --- sdks/python/apache_beam/io/gcp/bigtableio.py | 7 +++--- .../apache_beam/io/gcp/bigtableio_it_test.py | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/bigtableio.py b/sdks/python/apache_beam/io/gcp/bigtableio.py index f402e6d7b393..f8534f38ddfc 100644 --- a/sdks/python/apache_beam/io/gcp/bigtableio.py +++ b/sdks/python/apache_beam/io/gcp/bigtableio.py @@ -252,11 +252,10 @@ def process(self, direct_row): "type": b'SetCell', "family_name": mutation.set_cell.family_name.encode('utf-8'), "column_qualifier": mutation.set_cell.column_qualifier, - "value": mutation.set_cell.value + "value": mutation.set_cell.value, + "timestamp_micros": struct.pack( + '>q', mutation.set_cell.timestamp_micros) } - micros = mutation.set_cell.timestamp_micros - if micros >= -1: - mutation_dict['timestamp_micros'] = struct.pack('>q', micros) elif mutation.__contains__("delete_from_column"): mutation_dict = { "type": b'DeleteFromColumn', diff --git a/sdks/python/apache_beam/io/gcp/bigtableio_it_test.py b/sdks/python/apache_beam/io/gcp/bigtableio_it_test.py index 9a54c50fcf19..794bbf1ce5ca 100644 --- a/sdks/python/apache_beam/io/gcp/bigtableio_it_test.py +++ b/sdks/python/apache_beam/io/gcp/bigtableio_it_test.py @@ -226,6 +226,7 @@ def test_set_mutation(self): # When setting this cell, we won't set a timestamp. We expect the timestamp # to default to -1, and Bigtable will set it to system time at insertion. row2_col1_no_timestamp = Cell(b'val2-2-notimestamp', time.time()) + row2_col1_neg1_timestamp = Cell(b'val2-2-neg1-timestamp', time.time()) # rows sent to write transform row1.set_cell( 'col_fam', b'col-1', row1_col1_cell.value, row1_col1_cell.timestamp) @@ -237,6 +238,8 @@ def test_set_mutation(self): 'col_fam', b'col-2', row2_col2_cell.value, row2_col2_cell.timestamp) # don't set a timestamp here. it should default to -1 row2.set_cell('col_fam', b'col-no-timestamp', row2_col1_no_timestamp.value) + row2.set_cell( + 'col_fam', b'col-neg1-timestamp', row2_col1_no_timestamp.value) self.run_pipeline([row1, row2]) @@ -254,18 +257,32 @@ def test_set_mutation(self): self.assertEqual( row2_col2_cell, actual_row2.find_cells('col_fam', b'col-2')[0]) - # check cells that don't have a timestamp set are handled properly: + # check cell that doesn't have a timestamp set is handled properly: self.assertEqual( row2_col1_no_timestamp.value, actual_row2.find_cells('col_fam', b'col-no-timestamp')[0].value) # Bigtable sets timestamp as insertion time, which is later than the # time.time() we set when creating this test case + cell_timestamp = actual_row2.find_cells('col_fam', + b'col-no-timestamp')[0].timestamp self.assertTrue( row2_col1_no_timestamp.timestamp < actual_row2.find_cells( 'col_fam', b'col-no-timestamp')[0].timestamp, - msg="Expected cell with unset timestamp to have ingestion time attached, " - f"but was {actual_row2.find_cells('col_fam', b'col-no-timestamp')[0].timestamp}" - ) + msg="Expected cell with unset timestamp to have ingestion time " + f"attached, but was {cell_timestamp}") + # check cell that has timestamp of `-1` is handled properly: + self.assertEqual( + row2_col1_neg1_timestamp.value, + actual_row2.find_cells('col_fam', b'col-neg1-timestamp')[0].value) + # Bigtable sets -1 timestamp as insertion time, which is later than the + # time.time() we set when creating this test case + cell_timestamp = actual_row2.find_cells('col_fam', + b'col-neg1-timestamp')[0].timestamp + self.assertTrue( + row2_col1_neg1_timestamp.timestamp < actual_row2.find_cells( + 'col_fam', b'col-neg1-timestamp')[0].timestamp, + msg="Expected cell with `-1` timestamp to have ingestion time " + f"attached, but was {cell_timestamp}") def test_delete_cells_mutation(self): col_fam = self.table.column_family('col_fam')