From 81f35ab62298a2ec9fadeded82461b363b6401db Mon Sep 17 00:00:00 2001 From: Damon Date: Wed, 6 Nov 2024 12:06:52 -0800 Subject: [PATCH] Distroless python sdk (#32960) * Enable Python distroless container image variants * Fix missing entrypoint * Revert testing using validatescontainer.sh * Create validateDistrolessContainerTests * Refactor for reusable gradle methods * Revert back * Finalize gradle * Migrate distroless build to its own gradle task * Remove gradle distroless build task * Add base target * Build docker image directly in test * Revert back to using plugin --- sdks/python/container/Dockerfile | 26 ++++++++++- sdks/python/container/common.gradle | 9 +++- sdks/python/test-suites/dataflow/build.gradle | 6 +++ .../python/test-suites/dataflow/common.gradle | 45 +++++++++++++++++++ sdks/python/test-suites/gradle.properties | 3 ++ 5 files changed, 87 insertions(+), 2 deletions(-) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 7bea6229668f..f3d22a4b5bc6 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -103,9 +103,33 @@ RUN if [ "$pull_licenses" = "true" ] ; then \ python /tmp/license_scripts/pull_licenses_py.py ; \ fi -FROM beam +FROM beam as base ARG pull_licenses COPY --from=third_party_licenses /opt/apache/beam/third_party_licenses /opt/apache/beam/third_party_licenses RUN if [ "$pull_licenses" != "true" ] ; then \ rm -rf /opt/apache/beam/third_party_licenses ; \ fi + +ARG TARGETARCH +FROM gcr.io/distroless/python3-debian12:latest-${TARGETARCH} as distroless +ARG py_version + +# Contains header files needed by the Python interpreter. +COPY --from=base /usr/local/include /usr/local/include + +# Contains the Python interpreter executables. +COPY --from=base /usr/local/bin /usr/local/bin + +# Contains the Python library dependencies. +COPY --from=base /usr/local/lib /usr/local/lib + +# Python standard library modules. +COPY --from=base /usr/lib/python${py_version} /usr/lib/python${py_version} + +# Contains the boot entrypoint and related files such as licenses. +COPY --from=base /opt /opt + +ENV PATH "$PATH:/usr/local/bin" + +# Despite the ENTRYPOINT set above, need to reset since deriving the layer derives from a different image. +ENTRYPOINT ["/opt/apache/beam/boot"] diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 0175778a6301..885662362894 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,10 +71,16 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") +def baseBuildTarget = 'base' +def buildTarget = project.findProperty('container-build-target') ?: 'base' +var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" +if (buildTarget != baseBuildTarget) { + imageName += "_${buildTarget}" +} docker { name containerImageName( - name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk", + name: imageName, root: project.rootProject.hasProperty(["docker-repository-root"]) ? project.rootProject["docker-repository-root"] : project.docker_image_default_repo_root, @@ -90,6 +96,7 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers + target buildTarget } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/test-suites/dataflow/build.gradle b/sdks/python/test-suites/dataflow/build.gradle index 04a79683fd36..4500b395b0a6 100644 --- a/sdks/python/test-suites/dataflow/build.gradle +++ b/sdks/python/test-suites/dataflow/build.gradle @@ -60,6 +60,12 @@ task validatesContainerTests { } } +task validatesDistrolessContainerTests { + getVersionsAsList('distroless_python_versions').each { + dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:validatesDistrolessContainer") + } +} + task examplesPostCommit { getVersionsAsList('dataflow_examples_postcommit_py_versions').each { dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:examples") diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 71d44652bc7e..cd0db4a62f77 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -380,6 +380,51 @@ task validatesContainer() { } } +/** + * Validates the distroless (https://github.com/GoogleContainerTools/distroless) variant of the Python SDK container + * image (sdks/python/container/Dockerfile). + * To test a single version of Python: + * ./gradlew :sdks:python:test-suites:dataflow:py311:validatesDistrolessContainer + * See https://cwiki.apache.org/confluence/display/BEAM/Python+Tips#PythonTips-VirtualEnvironmentSetup + * for more information on setting up different Python versions. + */ +task validatesDistrolessContainer() { + def pyversion = "${project.ext.pythonVersion.replace('.', '')}" + def buildTarget = 'distroless' + def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" + def tag = java.time.Instant.now().getEpochSecond() + def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_${buildTarget}:${tag}" + project.rootProject.ext['docker-repository-root'] = repository + project.rootProject.ext['container-build-target'] = buildTarget + project.rootProject.ext['docker-tag'] = tag + if (project.rootProject.hasProperty('dry-run')) { + println "Running in dry run mode: imageURL: ${imageURL}, pyversion: ${pyversion}, buildTarget: ${buildTarget}, repository: ${repository}, tag: ${tag}, envdir: ${envdir}" + return + } + dependsOn 'initializeForDataflowJob' + dependsOn ":sdks:python:container:py${pyversion}:docker" + dependsOn ":sdks:python:container:py${pyversion}:dockerPush" + def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" + def argMap = [ + "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", + "project": "apache-beam-testing", + "region": "us-central1", + "runner": "TestDataflowRunner", + "sdk_container_image": "${imageURL}", + "sdk_location": "container", + "staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it", + "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", + ] + def cmdArgs = mapToArgString(argMap) + doLast { + exec { + workingDir = "${rootDir}/sdks/python" + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pytest ${testTarget} --test-pipeline-options=\"${cmdArgs}\"" + } + } +} + task validatesContainerARM() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" dependsOn 'initializeForDataflowJob' diff --git a/sdks/python/test-suites/gradle.properties b/sdks/python/test-suites/gradle.properties index d027cd3144d3..08266c4b0dd5 100644 --- a/sdks/python/test-suites/gradle.properties +++ b/sdks/python/test-suites/gradle.properties @@ -54,3 +54,6 @@ prism_examples_postcommit_py_versions=3.9,3.12 # cross language postcommit python test suites cross_language_validates_py_versions=3.9,3.12 + +# Python versions to support distroless variants +distroless_python_versions=3.9,3.10,3.11,3.12