Add --ramp-up-blocks parameter to the stream command to deal with OOM… #23
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
name: CICD (Merge) | |
on: | |
push: | |
branches: [main] | |
jobs: | |
get-updated-dirs: | |
runs-on: ubuntu-latest | |
outputs: | |
dirs: ${{ steps.get-dirs.outputs.dirs }} | |
steps: | |
- name: Checkout repo | |
uses: actions/checkout@v2 | |
with: | |
fetch-depth: 0 | |
- name: Get updated directories | |
id: get-dirs | |
run: | | |
DIRS=$(git diff --name-only HEAD~1 HEAD | grep '^airflow/dags/resources/stages/parse/table_definitions' | cut -d"/" -f1-7 | uniq) | |
echo "Updated directories: $DIRS" | |
sudo apt-get install jq | |
DIRS_JSON=$(echo $DIRS | tr '\n' ' ' | jq -R -s -c 'split(" ") | map(select(length > 0))') | |
echo "dirs=$DIRS_JSON" >> $GITHUB_OUTPUT | |
run: | |
needs: [get-updated-dirs] | |
if: needs.get-updated-dirs.outputs.dirs != '[]' | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
dir: ${{fromJson(needs.get-updated-dirs.outputs.dirs)}} | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v2 | |
- name: Build Docker image | |
run: | | |
docker build -t polygon-etl-parse:latest -f airflow/parse.Dockerfile airflow/. | |
- name: Run Docker image | |
run: | | |
DATASET_NAME=${{ matrix.dir }} | |
DATASET_NAME=${DATASET_NAME#airflow/dags/resources/stages/parse/table_definitions/} # this will remove 'airflow/dags/resources/stages/parse/table_definitions/' from start | |
echo "$SERVICE_ACCOUNT_PROD" > ./credentials.json | |
docker run \ | |
-v $PWD:/app \ | |
-e GOOGLE_APPLICATION_CREDENTIALS=/app/credentials.json \ | |
polygon-etl-parse:latest \ | |
--project ${{ vars.PARSE_PROJECT }} \ | |
--dataset_name $DATASET_NAME \ | |
--dataset_folder "/app/${{ matrix.dir }}" \ | |
--state_bucket ${{ vars.PARSE_STATE_BUCKET }} \ | |
--destination_dataset_project_id ${{ vars.PARSE_DESTINATION_DATASET_PROJECT_ID }} | |
rm ./credentials.json | |
env: | |
SERVICE_ACCOUNT_PROD: ${{ secrets.SERVICE_ACCOUNT_PROD }} |