Merge pull request #33098 from jrmccluskey/dataframesUpdate #640
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
# To learn more about GitHub Actions in Apache Beam check the CI.md | |
name: Build python source distribution and wheels | |
on: | |
schedule: | |
- cron: '10 2 * * *' | |
push: | |
branches: ['master', 'release-*'] | |
tags: 'v*' | |
pull_request: | |
branches: ['master', 'release-*'] | |
tags: 'v*' | |
paths: ['sdks/python/**', 'model/**', 'release/**'] | |
workflow_dispatch: | |
# This allows a subsequently queued workflow run to interrupt previous runs | |
concurrency: | |
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login}}' | |
cancel-in-progress: true | |
env: | |
GCP_PATH: "gs://${{ secrets.GCP_PYTHON_WHEELS_BUCKET }}/${GITHUB_REF##*/}/${GITHUB_SHA}-${GITHUB_RUN_ID}/" | |
jobs: | |
check_env_variables: | |
timeout-minutes: 5 | |
name: "Check environment variables" | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
env: | |
EVENT_NAME: ${{ github.event_name }} | |
# Keep in sync with py_version matrix value below - if changed, change that as well. | |
PY_VERSIONS_FULL: "cp39-* cp310-* cp311-* cp312-*" | |
outputs: | |
gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} | |
py-versions-full: ${{ steps.set-py-versions.outputs.py-versions-full }} | |
py-versions-test: ${{ steps.set-py-versions.outputs.py-versions-test }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: "Check are GCP variables set" | |
run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" | |
id: check_gcp_variables | |
env: | |
GCP_SA_EMAIL: "not used by self hosted runner" | |
GCP_SA_KEY: "not used by self hosted runner" | |
GCP_PYTHON_WHEELS_BUCKET: ${{ secrets.GCP_PYTHON_WHEELS_BUCKET }} | |
GCP_PROJECT_ID: "not-needed-here" | |
GCP_REGION: "not-needed-here" | |
GCP_TESTING_BUCKET: "not-needed-here" | |
- name: Set Python Versions for different environments | |
id: set-py-versions | |
run: | | |
set -xeu | |
if [ $EVENT_NAME == "pull_request" ]; then | |
# run highest supported version on pull request. | |
echo "py-versions-test=${PY_VERSIONS_FULL##* }" >> $GITHUB_OUTPUT | |
else | |
# run full version for push and cron jobs. | |
echo "py-versions-test=$PY_VERSIONS_FULL" >> $GITHUB_OUTPUT | |
fi | |
# Output full set of versions so that we can test all languages on pull requests for certain platforms. | |
echo "py-versions-full=$PY_VERSIONS_FULL" >> $GITHUB_OUTPUT | |
build_source: | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
name: Build python source distribution | |
outputs: | |
is_rc: ${{ steps.is_rc.outputs.is_rc }} | |
rc_num: ${{ steps.get_rc_version.outputs.RC_NUM }} | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.9 | |
- name: Get tag | |
id: get_tag | |
run: | | |
echo "TAG=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT | |
- name: Check whether an -RC tag was applied to the commit. | |
id: is_rc | |
run: | | |
echo ${{ steps.get_tag.outputs.TAG }} > temp | |
OUTPUT=$( if grep -e '-RC.' -q temp; then echo 1; else echo 0; fi) | |
echo "is_rc=$OUTPUT" >> $GITHUB_OUTPUT | |
- name: Get RELEASE_VERSION and RC_NUM | |
if: steps.is_rc.outputs.is_rc == 1 | |
id: get_rc_version | |
run: | | |
RC_NUM=$(sed -n "s/^.*-RC\([0-9]*\)/\1/p" temp) | |
RELEASE_VERSION=$(sed -n "s/^v\(.*\)-RC[0-9]/\1/p" temp) | |
echo "RC_NUM=$RC_NUM" >> $GITHUB_OUTPUT | |
echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_OUTPUT | |
- name: Build source | |
working-directory: ./sdks/python | |
run: pip install -U build && python -m build --sdist | |
- name: Add checksums | |
working-directory: ./sdks/python/dist | |
run: | | |
file=$(ls | grep .tar.gz | head -n 1) | |
sha512sum $file > ${file}.sha512 | |
- name: Unzip source | |
working-directory: ./sdks/python | |
run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1) | |
- name: Rename source directory | |
working-directory: ./sdks/python | |
# https://github.com/pypa/setuptools/issues/4300 changed naming. Match both old and new names. | |
run: mv $(ls | grep "apache-beam-\|apache_beam-") apache-beam-source | |
- name: Upload source as artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: source | |
path: sdks/python/apache-beam-source | |
- name: Upload compressed sources as artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: source_zip | |
path: sdks/python/dist | |
- name: Clear dist | |
if: steps.is_rc.outputs.is_rc == 1 | |
working-directory: ./sdks/python | |
run: | | |
rm -r ./dist | |
rm -rd apache-beam-source | |
- name: Rewrite SDK version to include RC number | |
if: steps.is_rc.outputs.is_rc == 1 | |
working-directory: ./sdks/python | |
run: | | |
RELEASE_VERSION=${{ steps.get_rc_version.outputs.RELEASE_VERSION }} | |
RC_NUM=${{ steps.get_rc_version.outputs.RC_NUM }} | |
sed -i -e "s/${RELEASE_VERSION}/${RELEASE_VERSION}rc${RC_NUM}/g" apache_beam/version.py | |
- name: Build RC source | |
if: steps.is_rc.outputs.is_rc == 1 | |
working-directory: ./sdks/python | |
run: pip install -U build && python -m build --sdist | |
- name: Add RC checksums | |
if: steps.is_rc.outputs.is_rc == 1 | |
working-directory: ./sdks/python/dist | |
run: | | |
file=$(ls | grep .tar.gz | head -n 1) | |
sha512sum $file > ${file}.sha512 | |
- name: Unzip RC source | |
if: steps.is_rc.outputs.is_rc == 1 | |
working-directory: ./sdks/python | |
run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1) | |
- name: Rename RC source directory | |
if: steps.is_rc.outputs.is_rc == 1 | |
working-directory: ./sdks/python | |
# https://github.com/pypa/setuptools/issues/4300 changed naming. Match both old and new names. | |
run: mv $(ls | grep "apache-beam-\|apache_beam-") apache-beam-source-rc | |
- name: Upload RC source as artifact | |
if: steps.is_rc.outputs.is_rc == 1 | |
uses: actions/upload-artifact@v4 | |
with: | |
name: source_rc${{ steps.get_rc_version.outputs.RC_NUM }} | |
path: sdks/python/apache-beam-source-rc | |
- name: Upload compressed RC sources as artifacts | |
if: steps.is_rc.outputs.is_rc == 1 | |
uses: actions/upload-artifact@v4 | |
with: | |
name: source_zip_rc${{ steps.get_rc_version.outputs.RC_NUM }} | |
path: sdks/python/dist | |
prepare_gcs: | |
name: Prepare GCS | |
needs: | |
- build_source | |
- check_env_variables | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request' | |
steps: | |
- name: Remove existing files on GCS bucket | |
run: gsutil rm -r ${{ env.GCP_PATH }} || true | |
upload_source_to_gcs: | |
name: Upload python source distribution to GCS bucket | |
needs: | |
- prepare_gcs | |
- check_env_variables | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' | |
steps: | |
- name: Download compressed sources from artifacts | |
uses: actions/[email protected] | |
with: | |
name: source_zip | |
path: source/ | |
- name: Copy sources to GCS bucket | |
run: gsutil cp -r -a public-read source/* ${{ env.GCP_PATH }} | |
build_wheels: | |
name: Build python ${{matrix.py_version}} wheels on ${{matrix.os_python.arch}} for ${{ matrix.os_python.os }} | |
needs: | |
- check_env_variables | |
- build_source | |
env: | |
CIBW_ARCHS_LINUX: ${{matrix.os_python.arch}} | |
runs-on: ${{ matrix.os_python.runner }} | |
timeout-minutes: 480 | |
strategy: | |
matrix: | |
os_python: [ | |
{"os": "ubuntu-20.04", "runner": [self-hosted, ubuntu-20.04, main], "python": "${{ needs.check_env_variables.outputs.py-versions-full }}", arch: "auto" }, | |
# Temporarily pin to macos-13 because macos-latest breaks this build | |
# TODO(https://github.com/apache/beam/issues/31114) | |
{"os": "macos-13", "runner": "macos-13", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "auto" }, | |
{"os": "windows-latest", "runner": "windows-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "auto" }, | |
{"os": "ubuntu-20.04", "runner": [self-hosted, ubuntu-20.04, main], "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "aarch64" } | |
] | |
# Keep in sync (remove asterisks) with PY_VERSIONS_FULL env var above - if changed, change that as well. | |
py_version: ["cp39-", "cp310-", "cp311-", "cp312-"] | |
steps: | |
- name: Download python source distribution from artifacts | |
uses: actions/[email protected] | |
with: | |
name: source | |
path: apache-beam-source | |
- name: Download Python SDK RC source distribution from artifacts | |
if: ${{ needs.build_source.outputs.is_rc == 1 }} | |
uses: actions/[email protected] | |
with: | |
name: source_rc${{ needs.build_source.outputs.rc_num }} | |
path: apache-beam-source-rc | |
- name: Install Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.9 | |
- uses: docker/setup-qemu-action@v1 | |
if: ${{matrix.os_python.arch == 'aarch64'}} | |
name: Set up QEMU | |
- name: Install cibuildwheel | |
# note: sync cibuildwheel version with gradle task sdks:python:bdistPy* steps | |
run: pip install cibuildwheel==2.17.0 setuptools | |
- name: Build wheel | |
# Only build wheel if it is one of the target versions for this platform, otherwise no-op | |
if: ${{ contains(matrix.os_python.python, matrix.py_version) }} | |
working-directory: apache-beam-source | |
env: | |
CIBW_BUILD: ${{ matrix.py_version }}* | |
# TODO: https://github.com/apache/beam/issues/23048 | |
CIBW_SKIP: "*-musllinux_*" | |
CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools | |
run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse | |
shell: bash | |
- name: install sha512sum on MacOS | |
if: startsWith(matrix.os_python.os, 'macos') | |
run: brew install coreutils | |
- name: Add checksums | |
if: ${{ contains(matrix.os_python.python, matrix.py_version) }} | |
working-directory: apache-beam-source/wheelhouse/ | |
run: | | |
for file in *.whl; do | |
sha512sum $file > ${file}.sha512 | |
done | |
shell: bash | |
- name: Upload wheels as artifacts | |
if: ${{ contains(matrix.os_python.python, matrix.py_version) }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: wheelhouse-${{ matrix.py_version }}${{ matrix.os_python.os }}${{ (matrix.os_python.arch == 'aarch64' && '-aarch64') || '' }} | |
path: apache-beam-source/wheelhouse/ | |
- name: Build RC wheels | |
# Only build wheel if it is one of the target versions for this platform, otherwise no-op | |
if: ${{ needs.build_source.outputs.is_rc == 1 && contains(matrix.os_python.python, matrix.py_version) }} | |
working-directory: apache-beam-source-rc | |
env: | |
CIBW_BUILD: ${{ matrix.py_version }}* | |
# TODO: https://github.com/apache/beam/issues/23048 | |
CIBW_SKIP: "*-musllinux_*" | |
CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools | |
run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse | |
shell: bash | |
- name: Add RC checksums | |
if: ${{ needs.build_source.outputs.is_rc == 1 }} | |
working-directory: apache-beam-source-rc/wheelhouse/ | |
run: | | |
for file in *.whl; do | |
sha512sum $file > ${file}.sha512 | |
done | |
shell: bash | |
- name: Upload RC wheels as artifacts | |
if: ${{ needs.build_source.outputs.is_rc == 1 }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: wheelhouse-rc${{ needs.build_source.outputs.rc_num }}-${{ matrix.py_version }}${{ matrix.os_python.os }}${{ (matrix.os_python.arch == 'aarch64' && '-aarch64') || '' }} | |
path: apache-beam-source-rc/wheelhouse/ | |
upload_wheels_to_gcs: | |
name: Upload wheels to GCS bucket | |
needs: | |
- build_wheels | |
- check_env_variables | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request' | |
steps: | |
- name: Download wheels from artifacts | |
uses: actions/download-artifact@v4 | |
with: | |
pattern: wheelhouse-* | |
merge-multiple: true | |
path: wheelhouse/ | |
- name: Copy wheels to GCS bucket | |
run: gsutil cp -r -a public-read wheelhouse/* ${{ env.GCP_PATH }} | |
- name: Create github action information file on GCS bucket | |
run: | | |
cat > github_action_info <<EOF | |
GITHUB_WORKFLOW=$GITHUB_WORKFLOW | |
GITHUB_RUN_ID=$GITHUB_RUN_ID | |
GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER | |
GITHUB_ACTION=$GITHUB_ACTION | |
GITHUB_ACTOR=$GITHUB_ACTOR | |
GITHUB_REPOSITORY=$GITHUB_REPOSITORY | |
GITHUB_EVENT_NAME=$GITHUB_EVENT_NAME | |
GITHUB_SHA=$GITHUB_SHA | |
GITHUB_REF=$GITHUB_REF | |
# only for forked repositiories | |
GITHUB_HEAD_REF=$GITHUB_HEAD_REF | |
GITHUB_BASE_REF=$GITHUB_BASE_REF | |
EOF | |
echo $(cat github_action_info) | |
gsutil cp -a public-read github_action_info ${{ env.GCP_PATH }} | |
- name: Upload GitHub event file to GCS bucket | |
run: gsutil cp -a public-read ${GITHUB_EVENT_PATH} ${{ env.GCP_PATH }} | |
list_files_on_gcs: | |
name: List files on Google Cloud Storage Bucket | |
needs: | |
- upload_wheels_to_gcs | |
- check_env_variables | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request' | |
steps: | |
- name: List file on Google Cloud Storage Bucket | |
run: gsutil ls "${{ env.GCP_PATH }}*" | |
branch_repo_nightly: | |
permissions: | |
contents: write | |
name: Branch repo nightly | |
needs: | |
- build_source | |
- build_wheels | |
runs-on: [self-hosted, ubuntu-20.04, main] | |
timeout-minutes: 60 | |
if: github.repository_owner == 'apache' && github.event_name == 'schedule' | |
steps: | |
- name: Checkout code on master branch | |
uses: actions/checkout@v4 | |
with: | |
persist-credentials: false | |
submodules: recursive | |
- name: Branch commit | |
run: | | |
BRANCH_NAME=${GITHUB_REF##*/} | |
echo "Updating nightly-${BRANCH_NAME}" | |
git branch -f nightly-${BRANCH_NAME} HEAD | |
- name: Push branch | |
uses: ./.github/actions/github-push-action | |
with: | |
github_token: ${{ secrets.GITHUB_TOKEN }} | |
force: true | |
branch: nightly-${{ github.ref }} |