Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/github_actions/actions-minor-upda…
Browse files Browse the repository at this point in the history
…tes-93a8451e6e
  • Loading branch information
maxfisher-g authored Feb 7, 2024
2 parents f4c5e73 + 6b875a6 commit 63ff084
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/osv-scanner-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ permissions:

jobs:
scan-pr:
uses: "google/osv-scanner/.github/workflows/osv-scanner-reusable-pr.yml@main"
uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable-pr.yml@v1.6.2-beta1"
2 changes: 1 addition & 1 deletion .github/workflows/osv-scanner-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ permissions:

jobs:
scan-scheduled:
uses: "google/osv-scanner/.github/workflows/osv-scanner-reusable.yml@main"
uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@v1.6.2-beta1"
15 changes: 15 additions & 0 deletions infra/cloudbuild/dynamic_loader/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
steps:
- name: gcr.io/google.com/cloudsdktool/cloud-sdk
env:
- 'PROJECT_ID=ossf-malware-analysis'
- 'LOAD_DATASET=testing' # TODO: change after testing is completed
- 'LOAD_TABLE_PREFIX=merge_'
- 'DEST_DATASET=testing' # TODO: change after testing is completed
- 'DEST_TABLE=analysis'
- 'RESULT_BUCKET=gs://ossf-malware-analysis-results'
- 'SCHEMA_FILE=function/loader/dynamic-analysis-schema.json'
entrypoint: '/bin/bash'
args: ['scripts/bq_load.sh']
timeout: 43200s # 12 hours
options:
logging: CLOUD_LOGGING_ONLY
File renamed without changes.
70 changes: 70 additions & 0 deletions scripts/bq_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash

if [ -z "$PROJECT_ID" ]; then
echo "PROJECT_ID must be set"
exit 1
fi

if [ -z "$LOAD_DATASET" ]; then
echo "LOAD_DATASET must be set"
exit 1
fi

if [ -z "$LOAD_TABLE_PREFIX" ]; then
echo "LOAD_TABLE_PREFIX must be set"
exit 1
fi

if [ -z "$DEST_DATASET" ]; then
echo "DEST_DATASET must be set"
exit 1
fi

if [ -z "$DEST_TABLE" ]; then
echo "DEST_TABLE must be set"
exit 1
fi

if [ -z "$RESULT_BUCKET" ]; then
echo "RESULT_BUCKET must be set"
exit 1
fi

if [ -z "$SCHEMA_FILE" ]; then
echo "SCHEMA_FILE must be set"
exit 1
fi

union=""

for bucket_prefix in `gsutil ls "$RESULT_BUCKET"`; do
prefix=`echo "$bucket_prefix" | sed "s|$RESULT_BUCKET/\([^\]*\)/|\1|g"`
clean_prefix=`echo "$prefix" | tr -c -d "[:alnum:]"`
table_name="$LOAD_TABLE_PREFIX$clean_prefix"

echo "## Loading $bucket_prefix into \`$PROJECT_ID.$LOAD_DATASET.$table_name\`."
bq load \
--headless \
--project_id="$PROJECT_ID" \
--dataset_id="$LOAD_DATASET" \
--replace \
--time_partitioning_type="DAY" \
--time_partitioning_field="CreatedTimestamp" \
--source_format="NEWLINE_DELIMITED_JSON" \
--max_bad_records=10000 \
"$table_name" "$bucket_prefix*" "$SCHEMA_FILE"

# Construct a UNION query for joining the prefix shards together
subquery="SELECT * FROM \`$PROJECT_ID.$LOAD_DATASET.$table_name\`"
if [ -n "$union" ]; then
union="$union UNION ALL "
fi
union="$union$subquery"
done

query="CREATE OR REPLACE TABLE \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\` LIKE \`$PROJECT_ID.$LOAD_DATASET.$table_name\` PARTITION BY TIMESTAMP_TRUNC(CreatedTimestamp, DAY) OPTIONS(expiration_timestamp=NULL) AS $union;"

echo "## Updating \`$PROJECT_ID.$DEST_DATASET.$DEST_TABLE\` from shards."
echo "Executing query: '$query'"

bq query --headless --nouse_legacy_sql --project_id="$PROJECT_ID" "$query"

0 comments on commit 63ff084

Please sign in to comment.