From df28e891c4374f7eac98cc6a4892b6e6c35a43f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 29 Oct 2024 18:02:11 +0100 Subject: [PATCH] feat(component): execute in a virtual env (#11326) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the ability to run the component inside a virtual environment, which is particularly useful when the Python system site package directory is read-only, causing pip to fail during the installation of kfp and other dependencies. To bypass this issue, a writable temporary directory is selected to create a virtual environment that inherits the global system site packages. The entire workflow is then executed from within this virtual environment. Furthermore, a new field, `use_venv`, has been added to the component decorator, with a default value of `False`. Signed-off-by: Sébastien Han --- components/google-cloud/RELEASE.md | 1 + sdk/python/kfp/dsl/component_decorator.py | 12 +++- sdk/python/kfp/dsl/component_factory.py | 15 +++++ .../component_with_pip_install_in_venv.py | 34 ++++++++++++ .../component_with_pip_install_in_venv.yaml | 55 +++++++++++++++++++ 5 files changed, 114 insertions(+), 3 deletions(-) create mode 100644 sdk/python/test_data/components/component_with_pip_install_in_venv.py create mode 100644 sdk/python/test_data/components/component_with_pip_install_in_venv.yaml diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 3cc17345d65..b110648af31 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -2,6 +2,7 @@ * Remove default prediction column names in `v1.model_evaluation.regression_component` component to fix pipeline errors when using bigquery data source. * Add reservation_affinition support in `v1.create_custom_training_job_from_component`. * Deprecate `preview.custom_job` module. +* Add a new `use_venv` field to the component decorator, enabling the component to run inside a virtual environment. ## Release 2.17.0 * Fix Gemini batch prediction support to `v1.model_evaluation.autosxs_pipeline` after output schema change. diff --git a/sdk/python/kfp/dsl/component_decorator.py b/sdk/python/kfp/dsl/component_decorator.py index 6e0c70679d9..f9392c14424 100644 --- a/sdk/python/kfp/dsl/component_decorator.py +++ b/sdk/python/kfp/dsl/component_decorator.py @@ -28,7 +28,8 @@ def component(func: Optional[Callable] = None, output_component_file: Optional[str] = None, install_kfp_package: bool = True, kfp_package_path: Optional[str] = None, - pip_trusted_hosts: Optional[List[str]] = None): + pip_trusted_hosts: Optional[List[str]] = None, + use_venv: bool = False): """Decorator for Python-function based components. A KFP component can either be a lightweight component or a containerized @@ -75,6 +76,9 @@ def component(func: Optional[Callable] = None, as that used when this component was created. Component authors can choose to override this to point to a GitHub pull request or other pip-compatible package server. + use_venv: Specifies if the component should be executed in a virtual environment. + The environment will be created in a temporary directory and will inherit the system site packages. + This is useful in restricted environments where most of the system is read-only. Returns: A component task factory that can be used in pipeline definitions. @@ -116,7 +120,8 @@ def pipeline(): output_component_file=output_component_file, install_kfp_package=install_kfp_package, kfp_package_path=kfp_package_path, - pip_trusted_hosts=pip_trusted_hosts) + pip_trusted_hosts=pip_trusted_hosts, + use_venv=use_venv) return component_factory.create_component_from_func( func, @@ -127,4 +132,5 @@ def pipeline(): output_component_file=output_component_file, install_kfp_package=install_kfp_package, kfp_package_path=kfp_package_path, - pip_trusted_hosts=pip_trusted_hosts) + pip_trusted_hosts=pip_trusted_hosts, + use_venv=use_venv) diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index c649424bac3..443728f4d2f 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -57,6 +57,7 @@ class ComponentInfo(): packages_to_install: Optional[List[str]] = None pip_index_urls: Optional[List[str]] = None pip_trusted_hosts: Optional[List[str]] = None + use_venv: bool = False # A map from function_name to components. This is always populated when a @@ -132,6 +133,15 @@ def make_pip_install_command( PIP_DISABLE_PIP_VERSION_CHECK=1 {pip_install_commands} && "$0" "$@" ''' +# Creates and activates a virtual environment in a temporary directory. +# The environment inherits the system site packages. +_use_venv_script_template = ''' +export PIP_DISABLE_PIP_VERSION_CHECK=1 +tmp=$(mktemp -d) +python3 -m venv "$tmp/venv" --system-site-packages +. "$tmp/venv/bin/activate" +''' + def _get_packages_to_install_command( kfp_package_path: Optional[str] = None, @@ -140,6 +150,7 @@ def _get_packages_to_install_command( install_kfp_package: bool = True, target_image: Optional[str] = None, pip_trusted_hosts: Optional[List[str]] = None, + use_venv: bool = False, ) -> List[str]: packages_to_install = packages_to_install or [] kfp_in_user_pkgs = any(pkg.startswith('kfp') for pkg in packages_to_install) @@ -154,6 +165,8 @@ def _get_packages_to_install_command( pip_trusted_hosts) if inject_kfp_install: + if use_venv: + pip_install_strings.append(_use_venv_script_template) if kfp_package_path: kfp_pip_install_command = make_pip_install_command( install_parts=[kfp_package_path], @@ -533,6 +546,7 @@ def create_component_from_func( install_kfp_package: bool = True, kfp_package_path: Optional[str] = None, pip_trusted_hosts: Optional[List[str]] = None, + use_venv: bool = False, ) -> python_component.PythonComponent: """Implementation for the @component decorator. @@ -547,6 +561,7 @@ def create_component_from_func( packages_to_install=packages_to_install, pip_index_urls=pip_index_urls, pip_trusted_hosts=pip_trusted_hosts, + use_venv=use_venv, ) command = [] diff --git a/sdk/python/test_data/components/component_with_pip_install_in_venv.py b/sdk/python/test_data/components/component_with_pip_install_in_venv.py new file mode 100644 index 00000000000..c9f78fbde8f --- /dev/null +++ b/sdk/python/test_data/components/component_with_pip_install_in_venv.py @@ -0,0 +1,34 @@ +# Copyright 2024 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from kfp.dsl import component + + +@component( + pip_index_urls=["https://pypi.org/simple"], + packages_to_install=["yapf"], + use_venv=True, +) +def component_with_pip_install(): + import yapf + + print(dir(yapf)) + + +if __name__ == "__main__": + from kfp import compiler + + compiler.Compiler().compile( + pipeline_func=component_with_pip_install, + package_path=__file__.replace(".py", ".yaml"), + ) diff --git a/sdk/python/test_data/components/component_with_pip_install_in_venv.yaml b/sdk/python/test_data/components/component_with_pip_install_in_venv.yaml new file mode 100644 index 00000000000..135c9b469b2 --- /dev/null +++ b/sdk/python/test_data/components/component_with_pip_install_in_venv.yaml @@ -0,0 +1,55 @@ +# PIPELINE DEFINITION +# Name: component-with-pip-install +components: + comp-component-with-pip-install: + executorLabel: exec-component-with-pip-install +deploymentSpec: + executors: + exec-component-with-pip-install: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - component_with_pip_install + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ \nexport PIP_DISABLE_PIP_VERSION_CHECK=1\ntmp=$(mktemp -d)\npython3 -m\ + \ venv \"$tmp/venv\" --system-site-packages\n. \"$tmp/venv/bin/activate\"\ + \n python3 -m pip install --quiet --no-warn-script-location --index-url\ + \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'kfp==2.9.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location --index-url\ + \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef component_with_pip_install():\n import yapf\n\n print(dir(yapf))\n\ + \n" + image: python:3.9 +pipelineInfo: + name: component-with-pip-install +root: + dag: + tasks: + component-with-pip-install: + cachingOptions: + enableCache: true + componentRef: + name: comp-component-with-pip-install + taskInfo: + name: component-with-pip-install +schemaVersion: 2.1.0 +sdkVersion: kfp-2.9.0