From 179582804e3ec074552bc251f7f7f9b1526779d4 Mon Sep 17 00:00:00 2001 From: Hung-Ting Wen Date: Mon, 16 Dec 2019 13:05:37 -0800 Subject: [PATCH] rm kfctl_* (#4577) --- py/kubeflow/kubeflow/ci/kfctl_e2e_workflow.py | 671 ------------------ .../kubeflow/ci/kfctl_go_build_test.py | 33 - .../kubeflow/ci/kfctl_go_deploy_test.py | 42 -- .../kubeflow/ci/kfctl_go_test_utils.py | 311 -------- 4 files changed, 1057 deletions(-) delete mode 100644 py/kubeflow/kubeflow/ci/kfctl_e2e_workflow.py delete mode 100644 py/kubeflow/kubeflow/ci/kfctl_go_build_test.py delete mode 100644 py/kubeflow/kubeflow/ci/kfctl_go_deploy_test.py delete mode 100644 py/kubeflow/kubeflow/ci/kfctl_go_test_utils.py diff --git a/py/kubeflow/kubeflow/ci/kfctl_e2e_workflow.py b/py/kubeflow/kubeflow/ci/kfctl_e2e_workflow.py deleted file mode 100644 index 873fc22bad6..00000000000 --- a/py/kubeflow/kubeflow/ci/kfctl_e2e_workflow.py +++ /dev/null @@ -1,671 +0,0 @@ -""""Define the E2E workflow for kfctl. - -Rapid iteration. - -Here are some pointers for rapidly iterating on the workflow during development. - -1. You can use the e2e_tool.py to directly launch the workflow on a K8s cluster. - If you don't have CLI access to the kubeflow-ci cluster (most folks) then - you would need to setup your own test cluster. - -2. To avoid redeploying on successive runs set the following parameters - --app_name=name for kfapp - --delete_kubeflow=False - - Setting these parameters will cause the same KF deployment to be reused - across invocations. As a result successive runs won't have to redeploy KF. - -Example running with E2E tool - -export PYTHONPATH=${PYTHONPATH}:${KUBEFLOW_REPO}/py:${KUBEFLOW_TESTING_REPO}/py - -python -m kubeflow.testing.e2e_tool apply \ - kubeflow.kubeflow.ci.kfctl_e2e_workflow.create_workflow - --name=${USER}-kfctl-test-$(date +%Y%m%d-%H%M%S) \ - --namespace=kubeflow-test-infra \ - --test-endpoint=true \ - --kf-app-name=${KFAPPNAME} \ - --delete-kf=false - --open-in-chrome=true - -We set kf-app-name and delete-kf to false to allow reusing the deployment -across successive runs. - -To use code from a pull request set the prow envariables; e.g. - -export JOB_NAME="jlewi-test" -export JOB_TYPE="presubmit" -export BUILD_ID=1234 -export PROW_JOB_ID=1234 -export REPO_OWNER=kubeflow -export REPO_NAME=kubeflow -export PULL_NUMBER=4148 -""" - -import datetime -from kubeflow.testing import argo_build_util -from kubeflow.testing import util -import logging -import os -import uuid - -# The name of the NFS volume claim to use for test files. -NFS_VOLUME_CLAIM = "nfs-external" -# The name to use for the volume to use to contain test data -DATA_VOLUME = "kubeflow-test-volume" - -# This is the main dag with the entrypoint -E2E_DAG_NAME = "e2e" -EXIT_DAG_NAME = "exit-handler" - -# This is a sub dag containing the suite of tests to run against -# Kubeflow deployment -TESTS_DAG_NAME = "gke-tests" - -TEMPLATE_LABEL = "kfctl_e2e" - -DEFAULT_REPOS = [ - "kubeflow/kubeflow@HEAD", - "kubeflow/testing@HEAD", - "kubeflow/tf-operator@HEAD" -] - -class Builder: - def __init__(self, name=None, namespace=None, - config_path=("https://raw.githubusercontent.com/kubeflow" - "/manifests/master/kfdef/kfctl_gcp_iap.yaml"), - bucket=None, - test_endpoint=False, - use_basic_auth=False, - build_and_apply=False, - test_target_name=None, - kf_app_name=None, delete_kf=True, - extra_repos="", - **kwargs): - """Initialize a builder. - - Args: - name: Name for the workflow. - namespace: Namespace for the workflow. - config_path: Path to the KFDef spec file. - bucket: The bucket to upload artifacts to. If not set use default determined by prow_artifacts.py. - test_endpoint: Whether to test the endpoint is ready. - use_basic_auth: Whether to use basic_auth. - test_target_name: (Optional) Name to use as the test target to group - tests. - kf_app_name: (Optional) Name to use for the Kubeflow deployment. - If not set a unique name is assigned. Only set this if you want to - reuse an existing deployment across runs. - delete_kf: (Optional) Don't run the step to delete Kubeflow. Set to - true if you want to leave the deployment up for some reason. - """ - self.name = name - self.namespace = namespace - self.bucket = bucket - self.config_path = config_path - self.build_and_apply = build_and_apply - #**************************************************************************** - # Define directory locations - #**************************************************************************** - # mount_path is the directory where the volume to store the test data - # should be mounted. - self.mount_path = "/mnt/" + "test-data-volume" - # test_dir is the root directory for all data for a particular test run. - self.test_dir = self.mount_path + "/" + self.name - # output_dir is the directory to sync to GCS to contain the output for this - # job. - self.output_dir = self.test_dir + "/output" - - # We prefix the artifacts directory with junit because - # that's what spyglass/prow requires. This ensures multiple - # instances of a workflow triggered by the same prow job - # don't end up clobbering each other - self.artifacts_dir = self.output_dir + "/artifacts/junit_{0}".format(name) - - # source directory where all repos should be checked out - self.src_root_dir = self.test_dir + "/src" - # The directory containing the kubeflow/kfctl repo - self.src_dir = self.src_root_dir + "/kubeflow/kfctl" - self.kubeflow_dir = self.src_root_dir + "/kubeflow/kubeflow" - - # Directory in kubeflow/kfctl containing the pytest files. - self.kfctl_pytest_dir = os.path.join(self.src_dir, "testing/e2e") - - # Top level directories for python code - self.kubeflow_py = self.kubeflow_dir - - # The directory within the kubeflow_testing submodule containing - # py scripts to use. - self.kubeflow_testing_py = self.src_root_dir + "/kubeflow/testing/py" - - self.tf_operator_root = os.path.join(self.src_root_dir, - "kubeflow/tf-operator") - self.tf_operator_py = os.path.join(self.tf_operator_root, "py") - - self.go_path = self.test_dir - - # Name for the Kubeflow app. - # This needs to be unique for each test run because it is - # used to name GCP resources - # TODO(jlewi): Might be good to include pull number or build id in the name - # Not sure if being non-deterministic is a good idea. - # A better approach might be to hash the workflow name to generate a unique - # name dependent on the workflow name. We know there will be one workflow - # per cluster. - self.uuid = uuid.uuid4().hex[0:4] - - # Config name is the name of the config file. This is used to give junit - # files unique names. - self.config_name = os.path.splitext(os.path.basename(config_path))[0] - - # The class name to label junit files. - # We want to be able to group related tests in test grid. - # Test grid allows grouping by target which corresponds to the classname - # attribute in junit files. - # So we set an environment variable to the desired class name. - # The pytest modules can then look at this environment variable to - # explicitly override the classname. - # The classname should be unique for each run so it should take into - # account the different parameters - if test_target_name: - self.test_target_name = test_target_name - else: - self.test_target_name = self.config_name - - # app_name is the name of the Kubeflow deployment. - # This needs to be unique per run since we name GCP resources with it. - self.app_name = kf_app_name - if not self.app_name: - self.app_name = "kfctl-" + self.uuid - - self.delete_kf = delete_kf - - # GCP service accounts can only be max 30 characters. Service account names - # are generated by taking the app_name and appending suffixes like "user" - # and "admin" - if len(self.app_name) > 20: - raise ValueError(("app_name {0} is longer than 20 characters; this will" - "likely exceed GCP naming restrictions.").format( - self.app_name)) - # Directory for the KF app. - self.app_dir = os.path.join(self.test_dir, self.app_name) - self.use_basic_auth = use_basic_auth - - # The name space we create KF artifacts in; e.g. TFJob and notebooks. - # TODO(jlewi): These should no longer be running the system namespace but - # should move into the namespace associated with the default profile. - self.steps_namespace = "kubeflow" - self.test_endpoint = test_endpoint - - self.kfctl_path = os.path.join(self.src_dir, "bin/kfctl") - - # Fetch the main repo from Prow environment. - self.main_repo = argo_build_util.get_repo_from_prow_env() - - # extra_repos is a list of comma separated repo names with commits, - # in the format /@, - # e.g. "kubeflow/tf-operator@12345,kubeflow/manifests@23456". - # This will be used to override the default repo branches. - self.extra_repos = [] - if extra_repos: - self.extra_repos = extra_repos.split(',') - - def _build_workflow(self): - """Create the scaffolding for the Argo workflow""" - workflow = { - "apiVersion": "argoproj.io/v1alpha1", - "kind": "Workflow", - "metadata": { - "name": self.name, - "namespace": self.namespace, - "labels": argo_build_util.add_dicts([{ - "workflow": self.name, - "workflow_template": TEMPLATE_LABEL, - }, argo_build_util.get_prow_labels()]), - }, - "spec": { - "entrypoint": E2E_DAG_NAME, - # Have argo garbage collect old workflows otherwise we overload the API - # server. - "ttlSecondsAfterFinished": 7 * 24 * 60 * 60, - "volumes": [ - { - "name": "gcp-credentials", - "secret": { - "secretName": "kubeflow-testing-credentials", - }, - }, - { - "name": DATA_VOLUME, - "persistentVolumeClaim": { - "claimName": NFS_VOLUME_CLAIM, - }, - }, - ], - "onExit": EXIT_DAG_NAME, - "templates": [ - { - "dag": { - "tasks": [], - }, - "name": E2E_DAG_NAME, - }, - { - "dag":{ - "tasks": [], - }, - "name": TESTS_DAG_NAME, - - }, - { - "dag": { - "tasks": [], - }, - "name": EXIT_DAG_NAME, - } - ], - }, # spec - } # workflow - - return workflow - - def _build_task_template(self): - """Return a template for all the tasks""" - - task_template = {'activeDeadlineSeconds': 3000, - 'container': {'command': [], - 'env': [ - {"name": "GOOGLE_APPLICATION_CREDENTIALS", - "value": "/secret/gcp-credentials/key.json"}, - {"name": "TEST_TARGET_NAME", - "value": self.test_target_name}, - ], - 'image': 'gcr.io/kubeflow-ci/test-worker:latest', - 'imagePullPolicy': 'Always', - 'name': '', - 'resources': {'limits': {'cpu': '4', 'memory': '4Gi'}, - 'requests': {'cpu': '1', 'memory': '1536Mi'}}, - 'volumeMounts': [{'mountPath': '/mnt/test-data-volume', - 'name': 'kubeflow-test-volume'}, - {'mountPath': '/secret/gcp-credentials', 'name': 'gcp-credentials'}]}, - 'metadata': {'labels': { - 'workflow_template': TEMPLATE_LABEL}}, - 'outputs': {}} - - # Define common environment variables to be added to all steps - common_env = [ - {'name': 'PYTHONPATH', - 'value': ":".join([self.kubeflow_py, self.kubeflow_py + "/py", - self.kubeflow_testing_py, - self.tf_operator_py])}, - {'name': 'GOPATH', - 'value': self.go_path}, - {'name': 'KUBECONFIG', - 'value': os.path.join(self.test_dir, 'kfctl_test/.kube/kubeconfig')}, - ] - - task_template["container"]["env"].extend(common_env) - - task_template = argo_build_util.add_prow_env(task_template) - - return task_template - - def _build_step(self, name, workflow, dag_name, task_template, - command, dependences): - """Syntactic sugar to add a step to the workflow""" - - step = argo_build_util.deep_copy(task_template) - - step["name"] = name - step["container"]["command"] = command - - argo_build_util.add_task_to_dag(workflow, dag_name, step, dependences) - - # Return the newly created template; add_task_to_dag makes a copy of the template - # So we need to fetch it from the workflow spec. - for t in workflow["spec"]["templates"]: - if t["name"] == name: - return t - workflow["spec"]["templates"].append(new_template) - - return None - - def _build_tests_dag(self): - """Build the dag for the set of tests to run against a KF deployment.""" - - task_template = self._build_task_template() - - #*************************************************************************** - # Test TFJob - job_name = self.config_name.replace("_", "-") - step_name = "tfjob-test" - command = [ - "python", - "-m", - "kubeflow.tf_operator.simple_tfjob_tests", - "--app_dir=" + os.path.join(self.tf_operator_root, "test/workflows"), - "--tfjob_version=v1", - # Name is used for the test case name so it should be unique across - # all E2E tests. - "--params=name=smoke-tfjob-" + job_name + ",namespace=" + - self.steps_namespace, - "--artifacts_path=" + self.artifacts_dir, - # Skip GPU tests - "--skip_tests=test_simple_tfjob_gpu", - ] - - dependences = [] - tfjob_test = self._build_step(step_name, self.workflow, TESTS_DAG_NAME, task_template, - command, dependences) - - #************************************************************************* - # Test pytorch job - step_name = "pytorch-job-deploy" - command = [ "python", - "-m", - "testing.test_deploy", - "--project=kubeflow-ci", - "--namespace=" + self.steps_namespace, - "--test_dir=" + self.test_dir, - "--artifacts_dir=" + self.artifacts_dir, - "--deploy_name=pytorch-job", - "--workflow_name=" + self.name, - "deploy_pytorchjob", - # TODO(jlewi): Does the image need to be updated? - "--params=image=pytorch/pytorch:v0.2,num_workers=1" - ] - - - dependences = [] - pytorch_test = self._build_step(step_name, self.workflow, TESTS_DAG_NAME, task_template, - command, dependences) - - #*************************************************************************** - # Notebook test - - step_name = "notebook-test" - command = ["pytest", - "jupyter_test.py", - # I think -s mean stdout/stderr will print out to aid in debugging. - # Failures still appear to be captured and stored in the junit file. - "-s", - "--namespace=" + self.steps_namespace, - # Test timeout in seconds. - "--timeout=500", - "--junitxml=" + self.artifacts_dir + "/junit_jupyter-test.xml", - ] - - dependences = [] - notebook_test = self._build_step(step_name, self.workflow, TESTS_DAG_NAME, task_template, - command, dependences) - - notebook_test["container"]["workingDir"] = os.path.join( - self.kubeflow_dir, "kubeflow/jupyter/tests") - - #*************************************************************************** - # Profiles test - - step_name = "profiles-test" - command = ["pytest", - "profiles_test.py", - # I think -s mean stdout/stderr will print out to aid in debugging. - # Failures still appear to be captured and stored in the junit file. - "-s", - # Test timeout in seconds. - "--timeout=600", - "--junitxml=" + self.artifacts_dir + "/junit_profiles-test.xml", - ] - - dependences = [] - profiles_test = self._build_step(step_name, self.workflow, TESTS_DAG_NAME, task_template, - command, dependences) - - profiles_test["container"]["workingDir"] = os.path.join( - self.kubeflow_dir, "kubeflow/profiles/tests") - - def _build_exit_dag(self): - """Build the exit handler dag""" - task_template = self._build_task_template() - - #*********************************************************************** - # Delete Kubeflow - step_name = "kfctl-delete" - command = [ - "pytest", - "kfctl_delete_test.py", - "-s", - "--log-cli-level=info", - "--timeout=1000", - "--junitxml=" + self.artifacts_dir + "/junit_kfctl-go-delete-test.xml", - "--app_path=" + self.app_dir, - "--kfctl_path=" + self.kfctl_path, - ] - - if self.delete_kf: - kfctl_delete = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, - task_template, - command, []) - - kfctl_delete["container"]["workingDir"] = self.kfctl_pytest_dir - - step_name = "copy-artifacts" - command = ["python", - "-m", - "kubeflow.testing.prow_artifacts", - "--artifacts_dir=" + - self.output_dir, - "copy_artifacts"] - - if self.bucket: - command = append("--bucket=" + self.bucket) - - dependences = [] - if self.delete_kf: - dependences = [kfctl_delete["name"]] - - copy_artifacts = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, task_template, - command, dependences) - - - step_name = "test-dir-delete" - command = ["python", - "-m", - "testing.run_with_retry", - "--retries=5", - "--", - "rm", - "-rf", - self.test_dir,] - dependences = [copy_artifacts["name"]] - copy_artifacts = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, task_template, - command, dependences) - - # We don't want to run from the directory we are trying to delete. - copy_artifacts["container"]["workingDir"] = "/" - - def build(self): - self.workflow = self._build_workflow() - task_template = self._build_task_template() - - #************************************************************************** - # Checkout - - # create the checkout step - - checkout = argo_build_util.deep_copy(task_template) - - # Construct the list of repos to checkout - list_of_repos = DEFAULT_REPOS - list_of_repos.append(self.main_repo) - list_of_repos.extend(self.extra_repos) - repos = util.combine_repos(list_of_repos) - repos_str = ','.join(['%s@%s' % (key, value) for (key, value) in repos.items()]) - - checkout["name"] = "checkout" - checkout["container"]["command"] = ["/usr/local/bin/checkout_repos.sh", - "--repos=" + repos_str, - "--src_dir=" + self.src_root_dir] - - argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, checkout, []) - - # Change the workfing directory for all subsequent steps - task_template["container"]["workingDir"] = os.path.join( - self.kfctl_pytest_dir) - - #************************************************************************** - # Run build_kfctl and deploy kubeflow - - step_name = "kfctl-build-deploy" - command = [ - "pytest", - "kfctl_go_test.py", - # I think -s mean stdout/stderr will print out to aid in debugging. - # Failures still appear to be captured and stored in the junit file. - "-s", - "--config_path=" + self.config_path, - "--build_and_apply=" + str(self.build_and_apply), - # Increase the log level so that info level log statements show up. - # TODO(https://github.com/kubeflow/testing/issues/372): If we - # set a unique artifacts dir for each workflow with the proper - # prefix that should work. - "--log-cli-level=info", - "--junitxml=" + self.artifacts_dir + "/junit_kfctl-build-test" - + self.config_name + ".xml", - # TODO(jlewi) Test suite name needs to be unique based on parameters. - # - "-o", "junit_suite_name=test_kfctl_go_deploy_" + self.config_name, - "--app_path=" + self.app_dir, - "--kfctl_repo_path=" + self.src_dir, - ] - - dependences = [checkout["name"]] - build_kfctl = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, - command, dependences) - - #************************************************************************** - # Wait for Kubeflow to be ready - step_name = "kubeflow-is-ready" - command = [ - "pytest", - "kf_is_ready_test.py", - # I think -s mean stdout/stderr will print out to aid in debugging. - # Failures still appear to be captured and stored in the junit file. - "-s", - # TODO(jlewi): We should update kf_is_ready_test to take the config - # path and then based on the KfDef spec kf_is_ready_test should - # figure out what to do. - "--use_basic_auth={0}".format(self.use_basic_auth), - # TODO(jlewi): We should be using ISTIO always so can we stop - # setting this - "--use_istio=true", - # Increase the log level so that info level log statements show up. - "--log-cli-level=info", - "--junitxml=" + os.path.join(self.artifacts_dir, - "junit_kfctl-is-ready-test-" + - self.config_name + ".xml"), - # Test suite name needs to be unique based on parameters - "-o", "junit_suite_name=test_kf_is_ready_" + self.config_name, - "--app_path=" + self.app_dir, - ] - - dependences = [build_kfctl["name"]] - kf_is_ready = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, - command, dependences) - - - #************************************************************************** - # Wait for endpoint to be ready - if self.test_endpoint: - step_name = "endpoint-is-ready" - command = ["pytest", - "endpoint_ready_test.py", - # I think -s mean stdout/stderr will print out to aid in debugging. - # Failures still appear to be captured and stored in the junit file. - "-s", - # Increase the log level so that info level log statements show up. - "--log-cli-level=info", - # Test timeout in seconds. - "--timeout=1800", - "--junitxml=" + self.artifacts_dir + "/junit_endpoint-is-ready-test-" + self.config_name + ".xml", - # Test suite name needs to be unique based on parameters - "-o", "junit_suite_name=test_endpoint_is_ready_" + self.config_name, - "--app_path=" + self.app_dir, - "--app_name=" + self.app_name, - "--use_basic_auth={0}".format(self.use_basic_auth), - ] - - dependencies = [build_kfctl["name"]] - endpoint_ready = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, - command, dependencies) - #************************************************************************** - # Do kfctl apply again. This test will be skip if it's presubmit. - step_name = "kfctl-second-apply" - command = [ - "pytest", - "kfctl_second_apply.py", - # I think -s mean stdout/stderr will print out to aid in debugging. - # Failures still appear to be captured and stored in the junit file. - "-s", - "--log-cli-level=info", - "--junitxml=" + os.path.join(self.artifacts_dir, - "junit_kfctl-second-apply-test-" + - self.config_name + ".xml"), - # Test suite name needs to be unique based on parameters - "-o", "junit_suite_name=test_kfctl_second_apply_" + self.config_name, - "--app_path=" + self.app_dir, - ] - if self.test_endpoint: - dependences = [kf_is_ready["name"], endpoint_ready["name"]] - else: - dependences = [kf_is_ready["name"]] - kf_second_apply = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, - command, dependences) - - self._build_tests_dag() - - # Add a task to run the dag - dependencies = [kf_is_ready["name"]] - argo_build_util.add_task_only_to_dag(self.workflow, E2E_DAG_NAME, TESTS_DAG_NAME, - TESTS_DAG_NAME, - dependencies) - - #*************************************************************************** - # create_pr_symlink - #*************************************************************************** - # TODO(jlewi): run_e2e_workflow.py should probably create the PR symlink - step_name = "create-pr-symlink" - command = ["python", - "-m", - "kubeflow.testing.prow_artifacts", - "--artifacts_dir=" + self.output_dir, - "create_pr_symlink"] - - if self.bucket: - command.append(self.bucket) - - dependences = [checkout["name"]] - symlink = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, - command, dependences) - - self._build_exit_dag() - - - # Set the labels on all templates - self.workflow = argo_build_util.set_task_template_labels(self.workflow) - - return self.workflow - -# TODO(jlewi): This is an unnecessary layer of indirection around the builder -# We should allow py_func in prow_config to point to the builder and -# let e2e_tool take care of this. -def create_workflow(**kwargs): # pylint: disable=too-many-statements - """Create workflow returns an Argo workflow to test kfctl upgrades. - - Args: - name: Name to give to the workflow. This can also be used to name things - associated with the workflow. - """ - - builder = Builder(**kwargs) - - return builder.build() diff --git a/py/kubeflow/kubeflow/ci/kfctl_go_build_test.py b/py/kubeflow/kubeflow/ci/kfctl_go_build_test.py deleted file mode 100644 index e3d4f0656da..00000000000 --- a/py/kubeflow/kubeflow/ci/kfctl_go_build_test.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging -import os - -import pytest -from kubeflow.testing import util -import kfctl_go_test_utils as kfctl_util - -def test_build_kfctl_go(record_xml_attribute): - """Test building of kfctl go. - - """ - util.set_pytest_junit(record_xml_attribute, "test_build_kfctl_go") - - # Need to activate account for scopes. - if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): - util.run([ - "gcloud", "auth", "activate-service-account", - "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] - ]) - - kfctl_path = kfctl_util.build_kfctl_go() - logging.info("kfctl go binary path %s", kfctl_path) - - -if __name__ == "__main__": - logging.basicConfig( - level=logging.INFO, - format=('%(levelname)s|%(asctime)s' - '|%(pathname)s|%(lineno)d| %(message)s'), - datefmt='%Y-%m-%dT%H:%M:%S', - ) - logging.getLogger().setLevel(logging.INFO) - pytest.main() diff --git a/py/kubeflow/kubeflow/ci/kfctl_go_deploy_test.py b/py/kubeflow/kubeflow/ci/kfctl_go_deploy_test.py deleted file mode 100644 index 5991574340f..00000000000 --- a/py/kubeflow/kubeflow/ci/kfctl_go_deploy_test.py +++ /dev/null @@ -1,42 +0,0 @@ -import logging -import os - -import pytest - -import kfctl_go_test_utils as kfctl_util -from kubeflow.testing import util - -def test_deploy_kfctl_go(record_xml_attribute, app_path, project, - use_basic_auth, use_istio, config_path): - """Test deploying Kubeflow. - - Args: - app_path: The path to the Kubeflow app. - project: The GCP project to use. - """ - util.set_pytest_junit(record_xml_attribute, "test_deploy_kfctl_go") - - # Need to activate account for scopes. - if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): - util.run([ - "gcloud", "auth", "activate-service-account", - "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] - ]) - - _, kfctl_path = kfctl_util.get_kfctl_go_build_dir_binary_path() - - kfctl_util.kfctl_deploy_kubeflow(app_path, project, use_basic_auth, - use_istio, config_path, kfctl_path) - - kfctl_util.verify_kubeconfig(app_path) - - -if __name__ == "__main__": - logging.basicConfig( - level=logging.INFO, - format=('%(levelname)s|%(asctime)s' - '|%(pathname)s|%(lineno)d| %(message)s'), - datefmt='%Y-%m-%dT%H:%M:%S', - ) - logging.getLogger().setLevel(logging.INFO) - pytest.main() diff --git a/py/kubeflow/kubeflow/ci/kfctl_go_test_utils.py b/py/kubeflow/kubeflow/ci/kfctl_go_test_utils.py deleted file mode 100644 index f285788cb1b..00000000000 --- a/py/kubeflow/kubeflow/ci/kfctl_go_test_utils.py +++ /dev/null @@ -1,311 +0,0 @@ -"""Common reusable steps for kfctl go testing.""" -import datetime -import json -import logging -import os -import tempfile -import urllib -import uuid -import re - -import requests -import yaml -from kubeflow.testing import util -from retrying import retry - -# retry 4 times, waiting 3 minutes between retries -@retry(stop_max_attempt_number=4, wait_fixed=180000) -def run_with_retries(*args, **kwargs): - util.run(*args, **kwargs) - -def build_kfctl_go(kfctl_repo_path): - """build the kfctl go binary and return the path for the same. - - Args: - kfctl_repo_path (str): Path to kfctl repo. - - Return: - kfctl_path (str): Path where kfctl go binary has been built. - will be Kubeflow/kubeflow/bootstrap/bin/kfctl - """ - kfctl_path = os.path.join(kfctl_repo_path, "bin", "kfctl") - # We need to use retry builds because when building in the test cluster - # we see intermittent failures pulling dependencies - run_with_retries(["make", "build-kfctl"], cwd=kfctl_repo_path) - return kfctl_path - -def get_or_create_app_path_and_parent_dir(app_path): - """Get a valid app_path and parent dir. Create if they are not existing. - """ - if not app_path: - logging.info("--app_path not specified") - stamp = datetime.datetime.now().strftime("%H%M") - parent_dir = tempfile.gettempdir() - app_path = os.path.join( - parent_dir, "kfctl-{0}-{1}".format(stamp, - uuid.uuid4().hex[0:4])) - else: - parent_dir = os.path.dirname(app_path) - - if not os.path.exists(parent_dir): - os.makedirs(parent_dir) - if not os.path.exists(app_path): - os.makedirs(app_path) - - return app_path, parent_dir - -def load_config(config_path): - """Load specified KFDef. - - Args: - config_path: Path to a YAML file containing a KFDef object. - Can be a local path or a URI like - https://raw.githubusercontent.com/kubeflow/manifests/master/kfdef/kfctl_gcp_iap.yaml - Returns: - config_spec: KfDef spec - """ - url_for_spec = urllib.parse.urlparse(config_path) - - if url_for_spec.scheme in ["http", "https"]: - data = requests.get(config_path) - return yaml.load(data.content) - else: - with open(config_path, 'r') as f: - config_spec = yaml.load(f) - return config_spec - -def set_env_init_args(config_spec): - gcp_plugin = {} - for plugin in config_spec.get("spec", {}).get("plugins", []): - if plugin.get("kind", "") == "KfGcpPlugin": - gcp_plugin = plugin - break - use_basic_auth = gcp_plugin.get("spec", {}).get("useBasicAuth", False) - logging.info("use_basic_auth=%s", use_basic_auth) - # Is it really needed? - init_args = [] - # Set ENV for basic auth username/password. - if use_basic_auth: - # Don't log the password. - # logging.info("Setting environment variables KUBEFLOW_USERNAME and KUBEFLOW_PASSWORD") - os.environ["KUBEFLOW_USERNAME"] = "kf-test-user" - os.environ["KUBEFLOW_PASSWORD"] = str(uuid.uuid4().hex) - init_args = ["--use_basic_auth"] - else: - # Owned by project kubeflow-ci-deployment. - logging.info("Setting environment variables CLIENT_SECRET and CLIENT_ID") - os.environ["CLIENT_SECRET"] = "CJ4qVPLTi0j0GJMkONj7Quwt" - os.environ["CLIENT_ID"] = ( - "29647740582-7meo6c7a9a76jvg54j0g2lv8lrsb4l8g" - ".apps.googleusercontent.com") - # Always use ISTIO. - # TODO(gabrielwen): We should be able to remove this flag. - init_args.append("--use_istio") - -def write_basic_auth_login(filename): - """Read basic auth login from ENV and write to the filename given. If username/password - cannot be found in ENV, this function will silently return. - - Args: - filename: The filename (directory/file name) the login is writing to. - """ - username = os.environ.get("KUBEFLOW_USERNAME", "") - password = os.environ.get("KUBEFLOW_PASSWORD", "") - - if not username or not password: - return - - with open(filename, "w") as f: - login = { - "username": username, - "password": password, - } - json.dump(login, f) - -def filter_spartakus(spec): - """Filter our Spartakus from KfDef spec. - - Args: - spec: KfDef spec - - Returns: - spec: Filtered KfDef spec - """ - for i, app in enumerate(spec["applications"]): - if app["name"] == "spartakus": - spec["applications"].pop(i) - break - return spec - -def get_config_spec(config_path, project, email, zone, app_path): - """Generate KfDef spec. - - Args: - config_path: Path to a YAML file containing a KFDef object. - Can be a local path or a URI like - https://raw.githubusercontent.com/kubeflow/manifests/master/kfdef/kfctl_gcp_iap.yaml - project: The GCP project to use. - email: a valid email of the GCP account - zone: a valid GCP zone for the cluster. - app_path: The path to the Kubeflow app. - Returns: - config_spec: Updated KfDef spec - """ - # TODO(https://github.com/kubeflow/kubeflow/issues/2831): Once kfctl - # supports loading version from a URI we should use that so that we - # pull the configs from the repo we checked out. - config_spec = load_config(config_path) - apiVersion = config_spec["apiVersion"].strip().split("/") - if len(apiVersion) != 2: - raise RuntimeError("Invalid apiVersion: " + config_spec["apiVersion"].strip()) - if apiVersion[-1] == "v1alpha1": - config_spec["spec"]["project"] = project - config_spec["spec"]["email"] = email - config_spec["spec"]["zone"] = zone - elif apiVersion[-1] == "v1beta1": - for plugin in config_spec["spec"].get("plugins", []): - if plugin.get("kind", "") == "KfGcpPlugin": - plugin["spec"]["project"] = project - plugin["spec"]["email"] = email - plugin["spec"]["zone"] = zone - break - else: - raise RuntimeError("Unknown version: " + apiVersion[-1]) - config_spec["spec"] = filter_spartakus(config_spec["spec"]) - - # Set KfDef name to be unique - # TODO(swiftdiaries): this is already being set at app_name - # we need to reuse that - regex = re.compile('[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?') - kfdef_name = regex.findall(app_path)[-1] - config_spec["metadata"]["name"] = kfdef_name - - repos = config_spec["spec"]["repos"] - manifests_repo_name = "manifests" - if os.getenv("REPO_NAME") == manifests_repo_name: - # kfctl_go_test.py was triggered on presubmit from the kubeflow/manifests - # repository. In this case we want to use the specified PR of the - # kubeflow/manifests repository; so we need to change the repo specification - # in the KFDef spec. - # TODO(jlewi): We should also point to a specific commit when triggering - # postsubmits from the kubeflow/manifests repo - for repo in repos: - if repo["name"] != manifests_repo_name: - continue - - version = None - - if os.getenv("PULL_PULL_SHA"): - # Presubmit - version = os.getenv("PULL_PULL_SHA") - - # See https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables # pylint: disable=line-too-long - elif os.getenv("PULL_BASE_SHA"): - version = os.getenv("PULL_BASE_SHA") - - if version: - repo["uri"] = ("https://github.com/kubeflow/manifests/archive/" - "{0}.tar.gz").format(version) - logging.info("Overwriting the URI") - else: - # Its a periodic job so use whatever value is set in the KFDef - logging.info("Not overwriting manifests version") - logging.info(str(config_spec)) - return config_spec - -def kfctl_deploy_kubeflow(app_path, project, use_basic_auth, use_istio, config_path, kfctl_path, build_and_apply): - """Deploy kubeflow. - - Args: - app_path: The path to the Kubeflow app. - project: The GCP project to use. - use_basic_auth: Whether to use basic_auth. - use_istio: Whether to use Istio or not - config_path: Path to the KFDef spec file. - kfctl_path: Path to the kfctl go binary - build_and_apply: whether to build and apply or apply - Returns: - app_path: Path where Kubeflow is installed - """ - # build_and_apply is a boolean used for testing both the new semantics - # test case 1: build_and_apply - # kfctl build -f - # kfctl apply - # test case 2: apply - # kfctl apply -f - - if not os.path.exists(kfctl_path): - msg = "kfctl Go binary not found: {path}".format(path=kfctl_path) - logging.error(msg) - raise RuntimeError(msg) - - app_path, parent_dir = get_or_create_app_path_and_parent_dir(app_path) - - logging.info("Project: %s", project) - logging.info("app path %s", app_path) - logging.info("kfctl path %s", kfctl_path) - # TODO(nrchakradhar): Probably move all the environ sets to set_env_init_args - zone = 'us-central1-a' - if not zone: - raise ValueError("Could not get zone being used") - - # We need to specify a valid email because - # 1. We need to create appropriate RBAC rules to allow the current user - # to create the required K8s resources. - # 2. Setting the IAM policy will fail if the email is invalid. - email = util.run(["gcloud", "config", "get-value", "account"]) - - if not email: - raise ValueError("Could not determine GCP account being used.") - if not project: - raise ValueError("Could not get project being used") - - config_spec = get_config_spec(config_path, project, email, zone, app_path) - with open(os.path.join(app_path, "tmp.yaml"), "w") as f: - yaml.dump(config_spec, f) - - # Set ENV for credentials IAP/basic auth needs. - set_env_init_args(config_spec) - - # Write basic auth login username/password to a file for later tests. - # If the ENVs are not set, this function call will be noop. - write_basic_auth_login(os.path.join(app_path, "login.json")) - - # build_and_apply - logging.info("running kfctl with build and apply: %s \n", build_and_apply) - - logging.info("switching working directory to: %s \n", app_path) - os.chdir(app_path) - - # Do not run with retries since it masks errors - logging.info("Running kfctl with config:\n%s", yaml.safe_dump(config_spec)) - if build_and_apply: - build_and_apply_kubeflow(kfctl_path, app_path) - else: - apply_kubeflow(kfctl_path, app_path) - return app_path - -def apply_kubeflow(kfctl_path, app_path): - util.run([kfctl_path, "apply", "-V", "-f=" + os.path.join(app_path, "tmp.yaml")], cwd=app_path) - return app_path - -def build_and_apply_kubeflow(kfctl_path, app_path): - util.run([kfctl_path, "build", "-V", "-f=" + os.path.join(app_path, "tmp.yaml")], cwd=app_path) - util.run([kfctl_path, "apply", "-V", "-f=" + os.path.join(app_path, "tmp.yaml")], cwd=app_path) - return app_path - -def verify_kubeconfig(app_path): - """Verify kubeconfig. - - Args: - app_path: KfDef spec path - """ - name = os.path.basename(app_path) - context = util.run(["kubectl", "config", "current-context"]).strip() - if name == context: - logging.info("KUBECONFIG current context name matches app name: %s", name) - else: - msg = "KUBECONFIG not having expected context: {expected} v.s. {actual}".format( - expected=name, actual=context) - logging.error(msg) - raise RuntimeError(msg)