From 2518c7032e7bca52800b44a2965a9c23ad3416c2 Mon Sep 17 00:00:00 2001 From: tvalentyn Date: Fri, 30 Aug 2024 10:44:10 -0700 Subject: [PATCH] Revert "docs: modernize py dependencies docs and example (#32345)" This reverts commit 28f2d47662a89b2e01d8f9046932201b8eba18e6. --- .../juliaset/{src => juliaset}/__init__.py | 0 .../juliaset/{src => }/juliaset/juliaset.py | 0 .../{src => }/juliaset/juliaset_test.py | 2 +- .../{src => }/juliaset/juliaset_test_it.py | 2 +- .../examples/complete/juliaset/pyproject.toml | 33 ------------ .../examples/complete/juliaset/setup.py | 26 ++++++--- .../juliaset/src/juliaset/__init__.py | 16 ------ .../sdks/python-pipeline-dependencies.md | 54 ++++++++----------- 8 files changed, 42 insertions(+), 91 deletions(-) rename sdks/python/apache_beam/examples/complete/juliaset/{src => juliaset}/__init__.py (100%) rename sdks/python/apache_beam/examples/complete/juliaset/{src => }/juliaset/juliaset.py (100%) rename sdks/python/apache_beam/examples/complete/juliaset/{src => }/juliaset/juliaset_test.py (97%) rename sdks/python/apache_beam/examples/complete/juliaset/{src => }/juliaset/juliaset_test_it.py (96%) delete mode 100644 sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml delete mode 100644 sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/__init__.py diff --git a/sdks/python/apache_beam/examples/complete/juliaset/src/__init__.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/__init__.py similarity index 100% rename from sdks/python/apache_beam/examples/complete/juliaset/src/__init__.py rename to sdks/python/apache_beam/examples/complete/juliaset/juliaset/__init__.py diff --git a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset.py similarity index 100% rename from sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset.py rename to sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset.py diff --git a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py similarity index 97% rename from sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test.py rename to sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py index b371c88d360d..6416831f4269 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test.py +++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test.py @@ -27,7 +27,7 @@ import pytest -from apache_beam.examples.complete.juliaset.src.juliaset import juliaset +from apache_beam.examples.complete.juliaset.juliaset import juliaset from apache_beam.testing.util import open_shards diff --git a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test_it.py similarity index 96% rename from sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py rename to sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test_it.py index 0aac9f9cffb9..a2a3262a1fb6 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/juliaset_test_it.py +++ b/sdks/python/apache_beam/examples/complete/juliaset/juliaset/juliaset_test_it.py @@ -27,7 +27,7 @@ import pytest from hamcrest.core.core.allof import all_of -from apache_beam.examples.complete.juliaset.src.juliaset import juliaset +from apache_beam.examples.complete.juliaset.juliaset import juliaset from apache_beam.io.filesystems import FileSystems from apache_beam.runners.runner import PipelineState from apache_beam.testing.pipeline_verifiers import PipelineStateMatcher diff --git a/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml b/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml deleted file mode 100644 index 6c865974cbd2..000000000000 --- a/sdks/python/apache_beam/examples/complete/juliaset/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -[project] -name = "juliaset" -version = "0.0.1" -description = "Julia set workflow package." - -# Configure the required packages and scripts to install. -# Note that the Python Dataflow containers come with numpy already installed -# so this dependency will not trigger anything to be installed unless a version -# restriction is specified. -dependencies = [ - "numpy" -] - -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" diff --git a/sdks/python/apache_beam/examples/complete/juliaset/setup.py b/sdks/python/apache_beam/examples/complete/juliaset/setup.py index 649a5be7db75..c3a9fe043765 100644 --- a/sdks/python/apache_beam/examples/complete/juliaset/setup.py +++ b/sdks/python/apache_beam/examples/complete/juliaset/setup.py @@ -15,16 +15,14 @@ # limitations under the License. # -"""setup.py module for the pipeline package. +"""Setup.py module for the workflow's worker utilities. -In this example, the pipeline code is gathered in a package that can be built -as source distribution and installed on the workers. The package is defined -in the pyproject.toml file. You can use setup.py file for defining -configuration that needs to be determined programatically, for example, -custom commands to run when a package is installed. +All the workflow related code is gathered in a package that will be built as a +source distribution, staged in the staging area for the workflow being run and +then installed in the workers when they start running. -You can install this package into the workers at runtime by using -the --setup_file pipeline option. +This behavior is triggered by specifying the --setup_file command line option +when running the workflow for remote execution. """ # pytype: skip-file @@ -109,7 +107,19 @@ def run(self): self.RunCustomCommand(command) +# Configure the required packages and scripts to install. +# Note that the Python Dataflow containers come with numpy already installed +# so this dependency will not trigger anything to be installed unless a version +# restriction is specified. +REQUIRED_PACKAGES = [ + 'numpy', +] + setuptools.setup( + name='juliaset', + version='0.0.1', + description='Julia set workflow package.', + install_requires=REQUIRED_PACKAGES, packages=setuptools.find_packages(), cmdclass={ # Command class instantiated and run during pip install scenarios. diff --git a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/__init__.py b/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/__init__.py deleted file mode 100644 index cce3acad34a4..000000000000 --- a/sdks/python/apache_beam/examples/complete/juliaset/src/juliaset/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md index 2a2a515ec09b..286e90a37053 100644 --- a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md @@ -95,53 +95,43 @@ If your pipeline uses packages that are not available publicly (e.g. packages th Often, your pipeline code spans multiple files. To run your project remotely, you must group these files as a Python package and specify the package when you run your pipeline. When the remote workers start, they will install your package. To group your files as a Python package and make it available remotely, perform the following steps: -1. Create a [pyproject.toml](https://packaging.python.org/en/latest/tutorials/packaging-projects/) file for your project. The following is a very basic `pyproject.toml` file. +1. Create a [setup.py](https://pythonhosted.org/an_example_pypi_project/setuptools.html) file for your project. The following is a very basic `setup.py` file. - [build-system] - requires = ["setuptools"] - build-backend = "setuptools.build_meta" - - [project] - name = "PACKAGE-NAME" - version = "PACKAGE-VERSION" - dependencies = [ - # List Python packages your pipeline depends on. - ] - -2. If your package requires if some programmatic configuration, or you need to use the `--setup_file` pipeline option, create a setup.py file for your project. - - # Note that the package can be completely defined by pyproject.toml. - # This file is optional. import setuptools - setuptools.setup() -3. Structure your project so that the root directory contains the `pyproject.toml`, the `setup.py` file, and a `src/` directory with the rest of the files. For example: + setuptools.setup( + name='PACKAGE-NAME', + version='PACKAGE-VERSION', + install_requires=[ + # List Python packages your pipeline depends on. + ], + packages=setuptools.find_packages(), + ) + +2. Structure your project so that the root directory contains the `setup.py` file, the main workflow file, and a directory with the rest of the files, for example: root_dir/ - pyproject.toml setup.py - src/ - main.py - my_package/ - my_pipeline_launcher.py - my_custom_dofns_and_transforms.py - other_utils_and_helpers.py + main.py + my_package/ + my_pipeline_launcher.py + my_custom_dofns_and_transforms.py + other_utils_and_helpers.py See [Juliaset](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/complete/juliaset) for an example that follows this project structure. -4. Install your package in the submission environment, for example by using the following command: +3. Install your package in the submission environment, for example by using the following command: pip install -e . -5. If you use a [custom container](#custom-containers), copy and install the package in the container as well. - -6. Run your pipeline with the following command-line option: +4. Run your pipeline with the following command-line option: --setup_file /path/to/setup.py -**Note:** It is not necessary to supply the `--requirements_file` [option](#pypi-dependencies) if the dependencies of your package are defined in the -`dependencies` field of the `pyproject.toml` file (see step 1). However unlike with the `--requirements_file` option, when you use the `--setup_file` option, Beam doesn't stage the dependent packages to the runner. -Only the pipeline package is staged. If they aren't already provided in the runtime environment, the package dependencies are installed from PyPI at runtime. +**Note:** It is not necessary to supply the `--requirements_file` [option](#pypi-dependencies) if the dependencies of your package are defined in the `install_requires` field of the `setup.py` file (see step 1). +However unlike with the `--requirements_file` option, when you use the `--setup_file` option, Beam doesn't stage the dependent packages to the runner. +Only the pipeline package is staged. If they aren't already provided in the runtime environment, +the package dependencies are installed from PyPI at runtime. ## Non-Python Dependencies or PyPI Dependencies with Non-Python Dependencies {#nonpython}