From 836a6a2a0aa318a84492a43a16070c3da33a9d17 Mon Sep 17 00:00:00 2001 From: Caleb Brown Date: Wed, 14 Feb 2024 13:51:13 +1100 Subject: [PATCH] Adjust the query to keep the destination rather than replace. (#1013) * Adjust the query to keep the destination rather than replace. Signed-off-by: Caleb Brown * Update BQ load comments. Signed-off-by: Caleb Brown --------- Signed-off-by: Caleb Brown --- scripts/bq_load.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/scripts/bq_load.sh b/scripts/bq_load.sh index 888c4d4f..a82e21c5 100755 --- a/scripts/bq_load.sh +++ b/scripts/bq_load.sh @@ -62,7 +62,21 @@ for bucket_prefix in `gsutil ls "$RESULT_BUCKET"`; do union="$union$subquery" done -query="CREATE OR REPLACE TABLE \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\` LIKE \`$PROJECT_ID.$LOAD_DATASET.$table_name\` PARTITION BY TIMESTAMP_TRUNC(CreatedTimestamp, DAY) OPTIONS(expiration_timestamp=NULL) AS $union;" +# Query to populate the destination table. +# +# If the table does not exist it will be created. Keeping the table ensures +# that breaking schema changes are not accidentally propagated to the +# destination table. +# +# A transaction is used to keep the update atomic. It will also rollback the +# TRUNCATE if the INSERT fails, such as when the schema has changed. +query=" +CREATE TABLE IF NOT EXISTS \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\` LIKE \`$PROJECT_ID.$LOAD_DATASET.$table_name\` + PARTITION BY TIMESTAMP_TRUNC(CreatedTimestamp, DAY) OPTIONS(expiration_timestamp=NULL); +BEGIN TRANSACTION; +TRUNCATE TABLE \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\`; +INSERT INTO \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\` $union; +COMMIT TRANSACTION;" echo "## Updating \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\` from shards." echo "Executing query: '$query'"