Skip to content

Commit

Permalink
Distroless python sdk (apache#32960)
Browse files Browse the repository at this point in the history
* Enable Python distroless container image variants

* Fix missing entrypoint

* Revert testing using validatescontainer.sh

* Create validateDistrolessContainerTests

* Refactor for reusable gradle methods

* Revert back

* Finalize gradle

* Migrate distroless build to its own gradle task

* Remove gradle distroless build task

* Add base target

* Build docker image directly in test

* Revert back to using plugin
  • Loading branch information
damondouglas authored Nov 6, 2024
1 parent eeebae1 commit 81f35ab
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 2 deletions.
26 changes: 25 additions & 1 deletion sdks/python/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,33 @@ RUN if [ "$pull_licenses" = "true" ] ; then \
python /tmp/license_scripts/pull_licenses_py.py ; \
fi

FROM beam
FROM beam as base
ARG pull_licenses
COPY --from=third_party_licenses /opt/apache/beam/third_party_licenses /opt/apache/beam/third_party_licenses
RUN if [ "$pull_licenses" != "true" ] ; then \
rm -rf /opt/apache/beam/third_party_licenses ; \
fi

ARG TARGETARCH
FROM gcr.io/distroless/python3-debian12:latest-${TARGETARCH} as distroless
ARG py_version

# Contains header files needed by the Python interpreter.
COPY --from=base /usr/local/include /usr/local/include

# Contains the Python interpreter executables.
COPY --from=base /usr/local/bin /usr/local/bin

# Contains the Python library dependencies.
COPY --from=base /usr/local/lib /usr/local/lib

# Python standard library modules.
COPY --from=base /usr/lib/python${py_version} /usr/lib/python${py_version}

# Contains the boot entrypoint and related files such as licenses.
COPY --from=base /opt /opt

ENV PATH "$PATH:/usr/local/bin"

# Despite the ENTRYPOINT set above, need to reset since deriving the layer derives from a different image.
ENTRYPOINT ["/opt/apache/beam/boot"]
9 changes: 8 additions & 1 deletion sdks/python/container/common.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,16 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy)
}

def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers")
def baseBuildTarget = 'base'
def buildTarget = project.findProperty('container-build-target') ?: 'base'
var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk"
if (buildTarget != baseBuildTarget) {
imageName += "_${buildTarget}"
}

docker {
name containerImageName(
name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk",
name: imageName,
root: project.rootProject.hasProperty(["docker-repository-root"]) ?
project.rootProject["docker-repository-root"] :
project.docker_image_default_repo_root,
Expand All @@ -90,6 +96,7 @@ docker {
platform(*project.containerPlatforms())
load project.useBuildx() && !pushContainers
push pushContainers
target buildTarget
}

dockerPrepare.dependsOn copyLauncherDependencies
Expand Down
6 changes: 6 additions & 0 deletions sdks/python/test-suites/dataflow/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ task validatesContainerTests {
}
}

task validatesDistrolessContainerTests {
getVersionsAsList('distroless_python_versions').each {
dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:validatesDistrolessContainer")
}
}

task examplesPostCommit {
getVersionsAsList('dataflow_examples_postcommit_py_versions').each {
dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:examples")
Expand Down
45 changes: 45 additions & 0 deletions sdks/python/test-suites/dataflow/common.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,51 @@ task validatesContainer() {
}
}

/**
* Validates the distroless (https://github.com/GoogleContainerTools/distroless) variant of the Python SDK container
* image (sdks/python/container/Dockerfile).
* To test a single version of Python:
* ./gradlew :sdks:python:test-suites:dataflow:py311:validatesDistrolessContainer
* See https://cwiki.apache.org/confluence/display/BEAM/Python+Tips#PythonTips-VirtualEnvironmentSetup
* for more information on setting up different Python versions.
*/
task validatesDistrolessContainer() {
def pyversion = "${project.ext.pythonVersion.replace('.', '')}"
def buildTarget = 'distroless'
def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}"
def tag = java.time.Instant.now().getEpochSecond()
def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_${buildTarget}:${tag}"
project.rootProject.ext['docker-repository-root'] = repository
project.rootProject.ext['container-build-target'] = buildTarget
project.rootProject.ext['docker-tag'] = tag
if (project.rootProject.hasProperty('dry-run')) {
println "Running in dry run mode: imageURL: ${imageURL}, pyversion: ${pyversion}, buildTarget: ${buildTarget}, repository: ${repository}, tag: ${tag}, envdir: ${envdir}"
return
}
dependsOn 'initializeForDataflowJob'
dependsOn ":sdks:python:container:py${pyversion}:docker"
dependsOn ":sdks:python:container:py${pyversion}:dockerPush"
def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it"
def argMap = [
"output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output",
"project": "apache-beam-testing",
"region": "us-central1",
"runner": "TestDataflowRunner",
"sdk_container_image": "${imageURL}",
"sdk_location": "container",
"staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it",
"temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it",
]
def cmdArgs = mapToArgString(argMap)
doLast {
exec {
workingDir = "${rootDir}/sdks/python"
executable 'sh'
args '-c', ". ${envdir}/bin/activate && pytest ${testTarget} --test-pipeline-options=\"${cmdArgs}\""
}
}
}

task validatesContainerARM() {
def pyversion = "${project.ext.pythonVersion.replace('.', '')}"
dependsOn 'initializeForDataflowJob'
Expand Down
3 changes: 3 additions & 0 deletions sdks/python/test-suites/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,6 @@ prism_examples_postcommit_py_versions=3.9,3.12

# cross language postcommit python test suites
cross_language_validates_py_versions=3.9,3.12

# Python versions to support distroless variants
distroless_python_versions=3.9,3.10,3.11,3.12

0 comments on commit 81f35ab

Please sign in to comment.