From 0eac9a848b2b6c358a5f2b1111df28a1c1823fcf Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 19 Sep 2023 21:01:52 -0400 Subject: [PATCH] [DATALAD RUNCMD] run codespell throughout but ignore fail === Do not change lines below === { "chain": [], "cmd": "codespell -w || :", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- CHANGELOG/CHANGELOG-v0.18.2.md | 4 ++-- CHANGELOG/CHANGELOG-v0.7.0.md | 2 +- CHANGELOG/CHANGELOG-v1.2.0.md | 2 +- CHANGELOG/CHANGELOG-v1.3.0.md | 2 +- CHANGELOG/CHANGELOG-v1.4.0.md | 4 ++-- .../flyte/golang_test_targets/go-gen.sh | 2 +- charts/flyte-binary/values.yaml | 2 +- charts/flyte-core/README.md | 2 +- charts/flyte-core/README.md.gotmpl | 2 +- charts/flyte-core/values-eks-override.yaml | 2 +- charts/flyte-deps/README.md | 2 +- charts/flyte-deps/README.md.gotmpl | 2 +- charts/flyte/README.md | 4 ++-- charts/flyte/README.md.gotmpl | 4 ++-- .../flyte/docker_build/docker_build.sh | 2 +- .../flyte/github_workflows/Readme.rst | 4 ++-- .../flyte/golang_test_targets/go-gen.sh | 2 +- .../pkg/rpc/datacatalogservice/service.go | 2 +- .../flyte/docker_build/docker_build.sh | 2 +- .../flyte/github_workflows/Readme.rst | 4 ++-- .../flyte/golang_test_targets/go-gen.sh | 2 +- flyteadmin/pkg/common/filters.go | 2 +- flyteadmin/pkg/common/flyte_url.go | 2 +- .../manager/impl/validation/task_validator.go | 2 +- flyteadmin/scheduler/core/gocron_scheduler.go | 6 ++--- flyteadmin/scheduler/doc.go | 2 +- .../schedule_entities_snapshot_repo.go | 2 +- flyteadmin/scheduler/schedule_executor.go | 2 +- .../snapshoter/versioned_snapshot.go | 6 ++--- flyteadmin/script/integration/launch.sh | 2 +- .../flyte/docker_build/docker_build.sh | 2 +- .../flyte/github_workflows/Readme.rst | 4 ++-- .../flyte/golang_test_targets/go-gen.sh | 2 +- flyteidl/Makefile | 2 +- .../flyte/golang_test_targets/go-gen.sh | 2 +- flyteidl/clients/go/admin/client.go | 2 +- flyteidl/clients/go/admin/client_builder.go | 2 +- flyteidl/clients/go/admin/client_test.go | 22 +++++++++---------- .../base_token_orchestrator_test.go | 4 ++-- flyteidl/clients/go/coreutils/literals.go | 2 +- flyteidl/generate_protos.sh | 2 +- flyteidl/protos/docs/admin/admin.rst | 8 +++---- .../protos/docs/datacatalog/datacatalog.rst | 2 +- flyteidl/protos/docs/service/index.rst | 2 +- .../protos/flyteidl/admin/launch_plan.proto | 4 ++-- flyteidl/protos/flyteidl/admin/signal.proto | 2 +- flyteidl/protos/flyteidl/admin/workflow.proto | 2 +- .../flyteidl/datacatalog/datacatalog.proto | 2 +- .../protos/flyteidl/service/dataproxy.proto | 2 +- .../flyte/docker_build/docker_build.sh | 2 +- .../flyte/golang_test_targets/go-gen.sh | 2 +- .../pluginmachinery/flytek8s/config/config.go | 4 ++-- .../go/tasks/pluginmachinery/registry.go | 4 ++-- .../tasks/plugins/array/arraystatus/status.go | 2 +- .../go/tasks/plugins/array/k8s/subtask.go | 2 +- .../plugins/k8s/kfoperators/mpi/mpi_test.go | 2 +- .../k8s/kfoperators/pytorch/pytorch_test.go | 2 +- .../kfoperators/tensorflow/tensorflow_test.go | 2 +- flyteplugins/tests/end_to_end.go | 2 +- .../flyte/docker_build/docker_build.sh | 2 +- .../flyte/golang_test_targets/go-gen.sh | 2 +- flytepropeller/cmd/manager/cmd/root.go | 2 +- flytepropeller/manager/doc.go | 4 ++-- .../manager/shardstrategy/environment.go | 2 +- .../v1alpha1/execution_config.go | 2 +- .../pkg/apis/flyteworkflow/v1alpha1/nodes.go | 4 ++-- .../pkg/compiler/errors/compiler_errors.go | 2 +- .../pkg/compiler/transformers/k8s/utils.go | 2 +- .../pkg/compiler/validators/utils_test.go | 2 +- flytepropeller/pkg/controller/handler.go | 2 +- .../pkg/controller/nodes/array/handler.go | 6 ++--- .../controller/nodes/array/handler_test.go | 6 ++--- flytepropeller/pkg/controller/nodes/cache.go | 4 ++-- .../pkg/controller/nodes/cache_test.go | 8 +++---- .../nodes/catalog/datacatalog/datacatalog.go | 4 ++-- .../nodes/catalog/datacatalog/transformer.go | 2 +- .../pkg/controller/nodes/executor.go | 4 ++-- .../pkg/controller/nodes/executor_test.go | 2 +- .../pkg/controller/nodes/node_exec_context.go | 2 +- .../nodes/subworkflow/launchplan/errors.go | 2 +- .../pkg/controller/nodes/task/handler.go | 2 +- .../pkg/controller/nodes/transformers.go | 2 +- .../pkg/controller/workflow/executor.go | 2 +- .../workflowstore/resource_version_caching.go | 2 +- .../pkg/webhook/vault_secret_manager_test.go | 2 +- flytepropeller/script/fold-logs.py | 2 +- flytestdlib/Makefile | 2 +- .../flyte/golang_test_targets/go-gen.sh | 2 +- flytestdlib/cli/pflags/readme.rst | 2 +- flytestdlib/config/config_cmd.go | 2 +- flytestdlib/contextutils/context.go | 2 +- rfc/README.md | 8 +++---- rfc/core language/1461-cache-serialize-api.md | 18 +++++++-------- rfc/core language/sum-types-2.md | 2 +- rfc/core language/sum-types.md | 2 +- ...ntime-workflow-control-using-signalling.md | 2 +- rfc/system/0007-community-groups.md | 2 +- rfc/system/1476-task-resources.md | 4 ++-- .../1483-flytepropeller-horizontal-scaling.md | 6 ++--- .../2633-eviction-of-cached-task-outputs.md | 8 +++---- rfc/system/2995-performance-benchmarking.md | 6 ++--- rfc/system/3346-array-node.md | 2 +- rfc/system/3553-config-override.md | 6 ++--- rfc/system/3749-sane-overridable-defaults.md | 6 ++--- rfc/system/RFC-0000-console-ui-upgrade.md | 2 +- rsts/community/index.rst | 2 +- rsts/community/troubleshoot.rst | 2 +- rsts/concepts/console.rst | 2 +- rsts/concepts/workflow_lifecycle.rst | 14 ++++++------ rsts/deployment/configuration/auth_setup.rst | 4 ++-- rsts/deployment/configuration/general.rst | 2 +- rsts/deployment/configuration/performance.rst | 20 ++++++++--------- rsts/index.rst | 2 +- script/generate_helm.sh | 2 +- script/generate_kustomize.sh | 2 +- 115 files changed, 194 insertions(+), 194 deletions(-) diff --git a/CHANGELOG/CHANGELOG-v0.18.2.md b/CHANGELOG/CHANGELOG-v0.18.2.md index d9818406c9a..ec3f1213eb7 100644 --- a/CHANGELOG/CHANGELOG-v0.18.2.md +++ b/CHANGELOG/CHANGELOG-v0.18.2.md @@ -29,7 +29,7 @@ See the flytekit [0.25.0 release notes](https://github.com/flyteorg/flytekit/rel In addition to component-specific versions released from each of the flyte repositories (e.g. flytepropeller:v0.16.5), new images will be re-tagged and pushed that match the flyte release version (e.g. the upcoming flytepropeller-release:v0.18.2). This makes it easier to make sure all your deployments are on the same version to ensure best compatibility. * Helm changes * [flyte-core](https://artifacthub.io/packages/helm/flyte/flyte-core) helm chart has reached release preview and can be leveraged to install your cloud(AWS/GCP) deployments of flyte. - * Going forward flyte-core will install flyte native scheduler, For AWS backword compatibility you need to define `workflow_schedule.type` to `aws`. (https://github.com/flyteorg/flyte/pull/1896) + * Going forward flyte-core will install flyte native scheduler, For AWS backward compatibility you need to define `workflow_schedule.type` to `aws`. (https://github.com/flyteorg/flyte/pull/1896) * [flyte](https://artifacthub.io/packages/helm/flyte/flyte) helm chart has been refactored to depend on flyte-core helm chart and install additional dependencies to continue to provide a sandboxed installation of flyte. **Migration Notes** @@ -59,4 +59,4 @@ See the flytekit [0.25.0 release notes](https://github.com/flyteorg/flytekit/rel countour: ... ``` - * Alternatively, if you do not have any dependency on external flyte depdencies, you can keep your ``myvalues.yaml`` and switch to using ``flyte-core`` helm chart directly with no changes. + * Alternatively, if you do not have any dependency on external flyte dependencies, you can keep your ``myvalues.yaml`` and switch to using ``flyte-core`` helm chart directly with no changes. diff --git a/CHANGELOG/CHANGELOG-v0.7.0.md b/CHANGELOG/CHANGELOG-v0.7.0.md index 24b3797a52a..270f1f2f5de 100644 --- a/CHANGELOG/CHANGELOG-v0.7.0.md +++ b/CHANGELOG/CHANGELOG-v0.7.0.md @@ -9,7 +9,7 @@ - Large steps towards intracloud workflow portability ## Console - - Ability to track lineage and caching information directly in the UI. On a cache hit - possible to jump to the orginating execution. + - Ability to track lineage and caching information directly in the UI. On a cache hit - possible to jump to the originating execution. - Ability to clone an execution - bug fixes diff --git a/CHANGELOG/CHANGELOG-v1.2.0.md b/CHANGELOG/CHANGELOG-v1.2.0.md index df8aad9aabe..4dbd2f3a2a5 100644 --- a/CHANGELOG/CHANGELOG-v1.2.0.md +++ b/CHANGELOG/CHANGELOG-v1.2.0.md @@ -4,7 +4,7 @@ - Support for Ray (https://github.com/flyteorg/flyte/issues/2641) - Also see the [blog post](https://blog.flyte.org/ray-and-flyte). - Execution names can be longer now, up to 63 characters (https://github.com/flyteorg/flyteadmin/pull/466) - Offloading FlyteWorkflow CRD static workflow spec (https://github.com/flyteorg/flyte/issues/2705) -- Enabled FlytePropeller subqueue - this means that everytime a pod is updated in the workflow it reevals for faster downstream scheduling +- Enabled FlytePropeller subqueue - this means that every time a pod is updated in the workflow it reevals for faster downstream scheduling - Add container configuration to default pod template (https://github.com/flyteorg/flyte/issues/2703) - Fixed issues with blobstore writes - GCS had duplicate writes and subworkflow inputs were rewritten on every evaluation, this meant slower evaluations - Support external deletion of non-terminal map task subtasks (as a result of https://github.com/flyteorg/flyte/issues/2701) diff --git a/CHANGELOG/CHANGELOG-v1.3.0.md b/CHANGELOG/CHANGELOG-v1.3.0.md index 1110c965377..c710af9334a 100644 --- a/CHANGELOG/CHANGELOG-v1.3.0.md +++ b/CHANGELOG/CHANGELOG-v1.3.0.md @@ -95,7 +95,7 @@ r.set_signal("signal-name", "execidabc123", True) ``` ### Overwritten Cached Values on Execution -Users can now configure workflow execution to overwrite the cache. Each task in the workflow execution, regardless of previous cache status, will execute and write cached values - overwritting previous values if necessary. This allows previously corrupted cache values to be corrected without the tedious process of incrementing the `cache_version` and re-registering Flyte workflows / tasks. +Users can now configure workflow execution to overwrite the cache. Each task in the workflow execution, regardless of previous cache status, will execute and write cached values - overwriting previous values if necessary. This allows previously corrupted cache values to be corrected without the tedious process of incrementing the `cache_version` and re-registering Flyte workflows / tasks. ### Support for Dask diff --git a/CHANGELOG/CHANGELOG-v1.4.0.md b/CHANGELOG/CHANGELOG-v1.4.0.md index d5f5a51e659..7608f44bfeb 100644 --- a/CHANGELOG/CHANGELOG-v1.4.0.md +++ b/CHANGELOG/CHANGELOG-v1.4.0.md @@ -1,7 +1,7 @@ # Flyte 1.4 release The main features of the 1.4 release are: -- Suport for `PodTemplate` at the task-level +- Support for `PodTemplate` at the task-level - Revamped auth system in flytekit As python 3.7 [reached](https://endoflife.date/python) EOL support in December of 2022, we dropped support for that version on this release. @@ -58,7 +58,7 @@ In https://github.com/flyteorg/flytekit/pull/1458 we introduced a new OAuth2 han ## New sandbox features In this new release `flytectl demo` brings the following new features: - Support for specifying extra configuration for Flyte -- Support for specifying extra cluster resource templates for boostrapping new namespaces +- Support for specifying extra cluster resource templates for bootstrapping new namespaces - Sandbox state (DB, buckets) is now persistent across restarts and upgrades ## Flyteconsole diff --git a/boilerplate/flyte/golang_test_targets/go-gen.sh b/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/charts/flyte-binary/values.yaml b/charts/flyte-binary/values.yaml index 932ca9cca81..f7e99f3c484 100644 --- a/charts/flyte-binary/values.yaml +++ b/charts/flyte-binary/values.yaml @@ -116,7 +116,7 @@ configuration: clientSecret: "" # internal Configuration for internal authentication # The settings for internal still need to be defined if you wish to use an external auth server - # These credentials are used during communication beteween the FlyteAdmin and Propeller microservices + # These credentials are used during communication between the FlyteAdmin and Propeller microservices internal: # clientId Client ID for internal authentication - set to flytepropeller or external auth server clientId: flytepropeller diff --git a/charts/flyte-core/README.md b/charts/flyte-core/README.md index 7150f049ce4..71fbfd8896d 100644 --- a/charts/flyte-core/README.md +++ b/charts/flyte-core/README.md @@ -48,7 +48,7 @@ helm install gateway bitnami/contour -n flyte ### CONFIGURATION NOTES: - The docker images, their tags and other default parameters are configured in `values.yaml` file. -- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the chosen config `values-*.yaml` are merged when generating the deployment manifest. - The configuration in `values-sandbox.yaml` is ready for installation in minikube. But `values-eks.yaml` should be edited before installation: s3 bucket, RDS hosts, iam roles, secrets and etc need to be modified. ## Values diff --git a/charts/flyte-core/README.md.gotmpl b/charts/flyte-core/README.md.gotmpl index 4976a6a39c2..aa20446da4d 100644 --- a/charts/flyte-core/README.md.gotmpl +++ b/charts/flyte-core/README.md.gotmpl @@ -52,7 +52,7 @@ helm install gateway bitnami/contour -n flyte ### CONFIGURATION NOTES: - The docker images, their tags and other default parameters are configured in `values.yaml` file. -- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the chosen config `values-*.yaml` are merged when generating the deployment manifest. - The configuration in `values-sandbox.yaml` is ready for installation in minikube. But `values-eks.yaml` should be edited before installation: s3 bucket, RDS hosts, iam roles, secrets and etc need to be modified. {{ template "chart.valuesSection" . }} diff --git a/charts/flyte-core/values-eks-override.yaml b/charts/flyte-core/values-eks-override.yaml index 76f371c6396..77aa7c5a584 100644 --- a/charts/flyte-core/values-eks-override.yaml +++ b/charts/flyte-core/values-eks-override.yaml @@ -1,6 +1,6 @@ # -- # Flyte uses a cloud hosted Cron scheduler to run workflows on a schedule. The following module is optional. Without, # this module, you will not have scheduled launchplans/workflows. -# we support native scheduler and this is for backward compatability to older scheduler only +# we support native scheduler and this is for backward compatibility to older scheduler only workflow_scheduler: enabled: true type: aws diff --git a/charts/flyte-deps/README.md b/charts/flyte-deps/README.md index 0cbf338fed1..6a5eb13e4f6 100644 --- a/charts/flyte-deps/README.md +++ b/charts/flyte-deps/README.md @@ -44,7 +44,7 @@ helm upgrade -f values.yaml flyte . ### CONFIGURATION NOTES: - The docker images, their tags and other default parameters are configured in `values.yaml` file. -- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the chosen config `values-*.yaml` are merged when generating the deployment manifest. ## Values diff --git a/charts/flyte-deps/README.md.gotmpl b/charts/flyte-deps/README.md.gotmpl index 8309d1c1eb9..e866a13203c 100644 --- a/charts/flyte-deps/README.md.gotmpl +++ b/charts/flyte-deps/README.md.gotmpl @@ -45,6 +45,6 @@ helm upgrade -f values.yaml flyte . ### CONFIGURATION NOTES: - The docker images, their tags and other default parameters are configured in `values.yaml` file. -- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the chosen config `values-*.yaml` are merged when generating the deployment manifest. {{ template "chart.valuesSection" . }} diff --git a/charts/flyte/README.md b/charts/flyte/README.md index 75f8912e0da..d5d152817bf 100644 --- a/charts/flyte/README.md +++ b/charts/flyte/README.md @@ -14,7 +14,7 @@ A Helm chart for Flyte Sandbox | https://helm.dask.org | daskoperator(dask-kubernetes-operator) | 2022.12.0 | | https://kubernetes.github.io/dashboard/ | kubernetes-dashboard | 4.0.2 | -**NOTE:** Flyte sandbox helm chart is depricated, From now follow the sandbox [docs](https://docs.flyte.org/en/latest/deployment/sandbox.html) for installing it on cloud +**NOTE:** Flyte sandbox helm chart is deprecated, From now follow the sandbox [docs](https://docs.flyte.org/en/latest/deployment/sandbox.html) for installing it on cloud ### SANDBOX INSTALLATION: - [Install helm 3](https://helm.sh/docs/intro/install/) @@ -47,7 +47,7 @@ helm upgrade -f values-sandbox.yaml flyte . ### CONFIGURATION NOTES: - The docker images, their tags and other default parameters are configured in `values.yaml` file. -- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the chosen config `values-*.yaml` are merged when generating the deployment manifest. ## Values diff --git a/charts/flyte/README.md.gotmpl b/charts/flyte/README.md.gotmpl index 8d016e4fcaa..5c2de35f18c 100644 --- a/charts/flyte/README.md.gotmpl +++ b/charts/flyte/README.md.gotmpl @@ -13,7 +13,7 @@ {{ template "chart.requirementsSection" . }} -**NOTE:** Flyte sandbox helm chart is depricated, From now follow the sandbox [docs](https://docs.flyte.org/en/latest/deployment/sandbox.html) for installing it on cloud +**NOTE:** Flyte sandbox helm chart is deprecated, From now follow the sandbox [docs](https://docs.flyte.org/en/latest/deployment/sandbox.html) for installing it on cloud ### SANDBOX INSTALLATION: - [Install helm 3](https://helm.sh/docs/intro/install/) @@ -47,6 +47,6 @@ helm upgrade -f values-sandbox.yaml flyte . ### CONFIGURATION NOTES: - The docker images, their tags and other default parameters are configured in `values.yaml` file. -- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the choosen config `values-*.yaml` are merged when generating the deployment manifest. +- Each Flyte installation type should have separate `values-*.yaml` file: for sandbox, EKS and etc. The configuration in `values.yaml` and the chosen config `values-*.yaml` are merged when generating the deployment manifest. {{ template "chart.valuesSection" . }} diff --git a/datacatalog/boilerplate/flyte/docker_build/docker_build.sh b/datacatalog/boilerplate/flyte/docker_build/docker_build.sh index a77f8a2d77c..817189aee17 100755 --- a/datacatalog/boilerplate/flyte/docker_build/docker_build.sh +++ b/datacatalog/boilerplate/flyte/docker_build/docker_build.sh @@ -43,7 +43,7 @@ fi docker build -t "$IMAGE_TAG_WITH_SHA" --target=${BUILD_PHASE} . echo "${IMAGE_TAG_WITH_SHA} built locally." -# if REGISTRY specified, push the images to the remote registy +# if REGISTRY specified, push the images to the remote registry if [ -n "$REGISTRY" ]; then if [ -n "${DOCKER_REGISTRY_PASSWORD}" ]; then diff --git a/datacatalog/boilerplate/flyte/github_workflows/Readme.rst b/datacatalog/boilerplate/flyte/github_workflows/Readme.rst index 905ddd81d80..f236923514f 100644 --- a/datacatalog/boilerplate/flyte/github_workflows/Readme.rst +++ b/datacatalog/boilerplate/flyte/github_workflows/Readme.rst @@ -7,7 +7,7 @@ Provides a two github actions workflows. Add ``flyteorg/github_workflows`` to your ``boilerplate/update.cfg`` file. -Add a github secret ``package_name`` with the name to use for publishing (e.g. ``flytepropeller``). Typicaly, this will be the same name as the repository. +Add a github secret ``package_name`` with the name to use for publishing (e.g. ``flytepropeller``). Typically, this will be the same name as the repository. *Note*: If you are working on a fork, include that prefix in your package name (``myfork/flytepropeller``). @@ -18,5 +18,5 @@ The actions will push to 2 repos: There are two workflows that get deployed: - 1. A workflow that runs on Pull Requests to build and push images to github registy tagged with the commit sha. + 1. A workflow that runs on Pull Requests to build and push images to github registry tagged with the commit sha. 2. A workflow that runs on master merges that bump the patch version of release tag, builds and pushes images to github registry tagged with the version, commit sha as well as "latest" diff --git a/datacatalog/boilerplate/flyte/golang_test_targets/go-gen.sh b/datacatalog/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/datacatalog/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/datacatalog/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/datacatalog/pkg/rpc/datacatalogservice/service.go b/datacatalog/pkg/rpc/datacatalogservice/service.go index a7ca391e2eb..7f4d5e20250 100644 --- a/datacatalog/pkg/rpc/datacatalogservice/service.go +++ b/datacatalog/pkg/rpc/datacatalogservice/service.go @@ -81,7 +81,7 @@ func NewDataCatalogService() *DataCatalogService { defer func() { if err := recover(); err != nil { catalogScope.MustNewCounter("initialization_panic", - "panics encountered initializating the datacatalog service").Inc() + "panics encountered initializing the datacatalog service").Inc() logger.Fatalf(context.Background(), fmt.Sprintf("caught panic: %v [%+v]", err, string(debug.Stack()))) } }() diff --git a/flyteadmin/boilerplate/flyte/docker_build/docker_build.sh b/flyteadmin/boilerplate/flyte/docker_build/docker_build.sh index a77f8a2d77c..817189aee17 100755 --- a/flyteadmin/boilerplate/flyte/docker_build/docker_build.sh +++ b/flyteadmin/boilerplate/flyte/docker_build/docker_build.sh @@ -43,7 +43,7 @@ fi docker build -t "$IMAGE_TAG_WITH_SHA" --target=${BUILD_PHASE} . echo "${IMAGE_TAG_WITH_SHA} built locally." -# if REGISTRY specified, push the images to the remote registy +# if REGISTRY specified, push the images to the remote registry if [ -n "$REGISTRY" ]; then if [ -n "${DOCKER_REGISTRY_PASSWORD}" ]; then diff --git a/flyteadmin/boilerplate/flyte/github_workflows/Readme.rst b/flyteadmin/boilerplate/flyte/github_workflows/Readme.rst index 905ddd81d80..f236923514f 100644 --- a/flyteadmin/boilerplate/flyte/github_workflows/Readme.rst +++ b/flyteadmin/boilerplate/flyte/github_workflows/Readme.rst @@ -7,7 +7,7 @@ Provides a two github actions workflows. Add ``flyteorg/github_workflows`` to your ``boilerplate/update.cfg`` file. -Add a github secret ``package_name`` with the name to use for publishing (e.g. ``flytepropeller``). Typicaly, this will be the same name as the repository. +Add a github secret ``package_name`` with the name to use for publishing (e.g. ``flytepropeller``). Typically, this will be the same name as the repository. *Note*: If you are working on a fork, include that prefix in your package name (``myfork/flytepropeller``). @@ -18,5 +18,5 @@ The actions will push to 2 repos: There are two workflows that get deployed: - 1. A workflow that runs on Pull Requests to build and push images to github registy tagged with the commit sha. + 1. A workflow that runs on Pull Requests to build and push images to github registry tagged with the commit sha. 2. A workflow that runs on master merges that bump the patch version of release tag, builds and pushes images to github registry tagged with the version, commit sha as well as "latest" diff --git a/flyteadmin/boilerplate/flyte/golang_test_targets/go-gen.sh b/flyteadmin/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/flyteadmin/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/flyteadmin/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flyteadmin/pkg/common/filters.go b/flyteadmin/pkg/common/filters.go index 285b20d5c43..507984aa0d8 100644 --- a/flyteadmin/pkg/common/filters.go +++ b/flyteadmin/pkg/common/filters.go @@ -241,7 +241,7 @@ func customizeField(field string, entity Entity) string { } func customizeEntity(field string, entity Entity) Entity { - // NamedEntity is considered a single object, but the metdata + // NamedEntity is considered a single object, but the metadata // is stored using a different entity type. if entity == NamedEntity && entityMetadataFields[field] { return NamedEntityMetadata diff --git a/flyteadmin/pkg/common/flyte_url.go b/flyteadmin/pkg/common/flyte_url.go index a78689f3649..c4734b411cd 100644 --- a/flyteadmin/pkg/common/flyte_url.go +++ b/flyteadmin/pkg/common/flyte_url.go @@ -14,7 +14,7 @@ import ( type ArtifactType int -// The suffixes in these constants are used to match against the tail end of the flyte url, to keep tne flyte url simpler +// The suffixes in these constants are used to match against the tail end of the flyte url, to keep the flyte url simpler const ( ArtifactTypeUndefined ArtifactType = iota ArtifactTypeI // inputs diff --git a/flyteadmin/pkg/manager/impl/validation/task_validator.go b/flyteadmin/pkg/manager/impl/validation/task_validator.go index c8625ec4bd6..f8d8f5ae110 100644 --- a/flyteadmin/pkg/manager/impl/validation/task_validator.go +++ b/flyteadmin/pkg/manager/impl/validation/task_validator.go @@ -293,7 +293,7 @@ func validateTaskType(taskID core.Identifier, taskType string, whitelistConfig r } else if scope.Project != taskID.Project { continue } - // We have a potential match! Verify that this task type is approved given the specifity of the whitelist. + // We have a potential match! Verify that this task type is approved given the specificity of the whitelist. if scope.Domain == "" { // All domains for this project are whitelisted return nil diff --git a/flyteadmin/scheduler/core/gocron_scheduler.go b/flyteadmin/scheduler/core/gocron_scheduler.go index 0d0f78830bc..83ab6714532 100644 --- a/flyteadmin/scheduler/core/gocron_scheduler.go +++ b/flyteadmin/scheduler/core/gocron_scheduler.go @@ -122,7 +122,7 @@ func (g *GoCronScheduler) ScheduleJob(ctx context.Context, schedule models.Sched // Update the catchupFrom time as the lastExecTime. // Here lastExecTime is passed to this function only from BootStrapSchedulesFromSnapShot which is during bootup - // Once initialized we wont be changing the catchupTime until the next boot + // Once initialized we won't be changing the catchupTime until the next boot job := &GoCronJob{nameOfSchedule: nameOfSchedule, schedule: schedule, funcWithSchedule: funcWithSchedule, catchupFromTime: lastExecTime, lastExecTime: lastExecTime, ctx: ctx} @@ -276,7 +276,7 @@ func (g *GoCronScheduler) AddFixedIntervalJob(ctx context.Context, job *GoCronJo lastTime = *job.lastExecTime } entryID := g.cron.ScheduleTimedJob(cron.ConstantDelaySchedule{Delay: d}, jobFunc, lastTime) - // Update the enttry id in the job which is handle to be used for removal + // Update the entry id in the job which is handle to be used for removal job.entryID = entryID logger.Infof(ctx, "successfully added the fixed rate schedule %s to the scheduler for schedule %+v", job.nameOfSchedule, job.schedule) @@ -298,7 +298,7 @@ func (g *GoCronScheduler) AddCronJob(ctx context.Context, job *GoCronJob) error jobFunc = job.Run entryID, err := g.cron.AddTimedJob(job.schedule.CronExpression, jobFunc) - // Update the enttry id in the job which is handle to be used for removal + // Update the entry id in the job which is handle to be used for removal job.entryID = entryID if err == nil { logger.Infof(ctx, "successfully added the schedule %s to the scheduler for schedule %+v", diff --git a/flyteadmin/scheduler/doc.go b/flyteadmin/scheduler/doc.go index 48691e35e1d..9caf26d74bc 100644 --- a/flyteadmin/scheduler/doc.go +++ b/flyteadmin/scheduler/doc.go @@ -74,7 +74,7 @@ // During the discrepancy the executions won't be scheduled on admin once the bug(1354) is fixed. // // b) Case when scheduled time T1 execution fails. The goroutine executing for T1 will go through 30 repetitions before -// aborting the run. In such a scenario its possible that furture scheduled time T2 succeeds and gets executed successfully +// aborting the run. In such a scenario its possible that future scheduled time T2 succeeds and gets executed successfully // by the admin. i.e admin could execute the schedules in this order T2, T1. This is rare case though // // c) Case when the scheduler goes down then once it comes back up it will run catch up on all the schedules using diff --git a/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go b/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go index 1a2c8cffe7e..9bce77c6169 100644 --- a/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go +++ b/flyteadmin/scheduler/repositories/gormimpl/schedule_entities_snapshot_repo.go @@ -18,7 +18,7 @@ type ScheduleEntitiesSnapshotRepo struct { metrics gormMetrics } -// TODO : always overwrite the exisiting snapshot instead of creating new rows +// TODO : always overwrite the existing snapshot instead of creating new rows func (r *ScheduleEntitiesSnapshotRepo) Write(ctx context.Context, input models.ScheduleEntitiesSnapshot) error { timer := r.metrics.GetDuration.Start() tx := r.db.Omit("id").Create(&input) diff --git a/flyteadmin/scheduler/schedule_executor.go b/flyteadmin/scheduler/schedule_executor.go index 9a4afbe07f1..c8da9cf251f 100644 --- a/flyteadmin/scheduler/schedule_executor.go +++ b/flyteadmin/scheduler/schedule_executor.go @@ -78,7 +78,7 @@ func (w *ScheduledExecutor) Run(ctx context.Context) error { gcronUpdater := core.NewUpdater(w.db, gcronScheduler) go wait.UntilWithContext(updaterCtx, gcronUpdater.UpdateGoCronSchedules, scheduleUpdaterDuration) - // Catch up simulataneously on all the schedules in the scheduler + // Catch up simultaneously on all the schedules in the scheduler currTime := time.Now() af := futures.NewAsyncFuture(ctx, func(ctx context.Context) (interface{}, error) { return gcronScheduler.CatchupAll(ctx, currTime), nil diff --git a/flyteadmin/scheduler/snapshoter/versioned_snapshot.go b/flyteadmin/scheduler/snapshoter/versioned_snapshot.go index 72a82da8344..34d741ecb97 100644 --- a/flyteadmin/scheduler/snapshoter/versioned_snapshot.go +++ b/flyteadmin/scheduler/snapshoter/versioned_snapshot.go @@ -12,7 +12,7 @@ import ( // Including a version provides compatibility check type VersionedSnapshot struct { Version int - Ser []byte + Set []byte } func (s *VersionedSnapshot) WriteSnapshot(w io.Writer, snapshot Snapshot) error { @@ -21,7 +21,7 @@ func (s *VersionedSnapshot) WriteSnapshot(w io.Writer, snapshot Snapshot) error return err } s.Version = snapshot.GetVersion() - s.Ser = byteContents + s.Set = byteContents enc := gob.NewEncoder(w) return enc.Encode(s) } @@ -33,7 +33,7 @@ func (s *VersionedSnapshot) ReadSnapshot(r io.Reader) (Snapshot, error) { } if s.Version == 1 { snapShotV1 := SnapshotV1{LastTimes: map[string]*time.Time{}} - err = snapShotV1.Deserialize(s.Ser) + err = snapShotV1.Deserialize(s.Set) if err != nil { return nil, err } diff --git a/flyteadmin/script/integration/launch.sh b/flyteadmin/script/integration/launch.sh index 6c49af8af37..cac1409e121 100755 --- a/flyteadmin/script/integration/launch.sh +++ b/flyteadmin/script/integration/launch.sh @@ -34,7 +34,7 @@ docker run \ --env "DOCKERNETES_DEBUG=${DOCKERNETES_DEBUG}" \ lyft/dockernetes:1.10.1-v0.1 /sbin/init -# wait for the system to initalize, then run execute.sh +# wait for the system to initialize, then run execute.sh docker exec \ -it \ dockernetes /flyteadmin/script/integration/k8s/main.sh diff --git a/flytecopilot/boilerplate/flyte/docker_build/docker_build.sh b/flytecopilot/boilerplate/flyte/docker_build/docker_build.sh index a77f8a2d77c..817189aee17 100755 --- a/flytecopilot/boilerplate/flyte/docker_build/docker_build.sh +++ b/flytecopilot/boilerplate/flyte/docker_build/docker_build.sh @@ -43,7 +43,7 @@ fi docker build -t "$IMAGE_TAG_WITH_SHA" --target=${BUILD_PHASE} . echo "${IMAGE_TAG_WITH_SHA} built locally." -# if REGISTRY specified, push the images to the remote registy +# if REGISTRY specified, push the images to the remote registry if [ -n "$REGISTRY" ]; then if [ -n "${DOCKER_REGISTRY_PASSWORD}" ]; then diff --git a/flytecopilot/boilerplate/flyte/github_workflows/Readme.rst b/flytecopilot/boilerplate/flyte/github_workflows/Readme.rst index 905ddd81d80..f236923514f 100644 --- a/flytecopilot/boilerplate/flyte/github_workflows/Readme.rst +++ b/flytecopilot/boilerplate/flyte/github_workflows/Readme.rst @@ -7,7 +7,7 @@ Provides a two github actions workflows. Add ``flyteorg/github_workflows`` to your ``boilerplate/update.cfg`` file. -Add a github secret ``package_name`` with the name to use for publishing (e.g. ``flytepropeller``). Typicaly, this will be the same name as the repository. +Add a github secret ``package_name`` with the name to use for publishing (e.g. ``flytepropeller``). Typically, this will be the same name as the repository. *Note*: If you are working on a fork, include that prefix in your package name (``myfork/flytepropeller``). @@ -18,5 +18,5 @@ The actions will push to 2 repos: There are two workflows that get deployed: - 1. A workflow that runs on Pull Requests to build and push images to github registy tagged with the commit sha. + 1. A workflow that runs on Pull Requests to build and push images to github registry tagged with the commit sha. 2. A workflow that runs on master merges that bump the patch version of release tag, builds and pushes images to github registry tagged with the version, commit sha as well as "latest" diff --git a/flytecopilot/boilerplate/flyte/golang_test_targets/go-gen.sh b/flytecopilot/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/flytecopilot/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/flytecopilot/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flyteidl/Makefile b/flyteidl/Makefile index 3b5c9fed464..a62ab95f9f3 100644 --- a/flyteidl/Makefile +++ b/flyteidl/Makefile @@ -28,7 +28,7 @@ test: install # ensures generate_protos script has been run .PHONY: test_unit test_unit: # we cannot use test_unit from go.mk because generated files contain commented import statements that - # go tries to intepret. So we need to use go list to get the packages that go understands. + # go tries to interpret. So we need to use go list to get the packages that go understands. go test -cover `go list ./...` -race .PHONY: build_python diff --git a/flyteidl/boilerplate/flyte/golang_test_targets/go-gen.sh b/flyteidl/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/flyteidl/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/flyteidl/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flyteidl/clients/go/admin/client.go b/flyteidl/clients/go/admin/client.go index 830c86fe89b..05979ef7abb 100644 --- a/flyteidl/clients/go/admin/client.go +++ b/flyteidl/clients/go/admin/client.go @@ -142,7 +142,7 @@ func NewAdminConnection(ctx context.Context, cfg *Config, opts ...grpc.DialOptio } } if cfg.InsecureSkipVerify { - logger.Warnf(ctx, "using insecureSkipVerify. Server's certificate chain and host name wont be verified. Caution : shouldn't be used for production usecases") + logger.Warnf(ctx, "using insecureSkipVerify. Server's certificate chain and host name won't be verified. Caution : shouldn't be used for production usecases") tlsConfig.InsecureSkipVerify = true creds = credentials.NewTLS(tlsConfig) } else { diff --git a/flyteidl/clients/go/admin/client_builder.go b/flyteidl/clients/go/admin/client_builder.go index a1648073998..182ad4b6ef4 100644 --- a/flyteidl/clients/go/admin/client_builder.go +++ b/flyteidl/clients/go/admin/client_builder.go @@ -26,7 +26,7 @@ func (cb *ClientsetBuilder) WithConfig(config *Config) *ClientsetBuilder { return cb } -// WithTokenCache allows pluggable token cache implemetations. eg; flytectl uses keyring as tokenCache +// WithTokenCache allows pluggable token cache implementations. eg; flytectl uses keyring as tokenCache func (cb *ClientsetBuilder) WithTokenCache(tokenCache cache.TokenCache) *ClientsetBuilder { cb.tokenCache = tokenCache return cb diff --git a/flyteidl/clients/go/admin/client_test.go b/flyteidl/clients/go/admin/client_test.go index 017f4e8ff88..592dd2e2f66 100644 --- a/flyteidl/clients/go/admin/client_test.go +++ b/flyteidl/clients/go/admin/client_test.go @@ -123,7 +123,7 @@ func TestGetAuthenticationDialOptionClientSecret(t *testing.T) { PerRetryTimeout: config.Duration{Duration: 1 * time.Second}, } t.Run("legal", func(t *testing.T) { - metatdata := &service.OAuth2MetadataResponse{ + metadata := &service.OAuth2MetadataResponse{ TokenEndpoint: "http://localhost:8089/token", ScopesSupported: []string{"code", "all"}, } @@ -131,7 +131,7 @@ func TestGetAuthenticationDialOptionClientSecret(t *testing.T) { AuthorizationMetadataKey: "flyte_authorization", } mockAuthClient := new(mocks.AuthMetadataServiceClient) - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(clientMetatadata, nil) dialOption, err := getAuthenticationDialOption(ctx, adminServiceConfig, nil, mockAuthClient) assert.Nil(t, dialOption) @@ -174,12 +174,12 @@ func TestGetAuthenticationDialOptionClientSecret(t *testing.T) { assert.EqualError(t, err, "failed to fetch client metadata. Error: expected err") }) t.Run("error during flyte client", func(t *testing.T) { - metatdata := &service.OAuth2MetadataResponse{ + metadata := &service.OAuth2MetadataResponse{ TokenEndpoint: "/token", ScopesSupported: []string{"code", "all"}, } mockAuthClient := new(mocks.AuthMetadataServiceClient) - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("failed")) dialOption, err := getAuthenticationDialOption(ctx, adminServiceConfig, nil, mockAuthClient) assert.Nil(t, dialOption) @@ -193,7 +193,7 @@ func TestGetAuthenticationDialOptionClientSecret(t *testing.T) { PerRetryTimeout: config.Duration{Duration: 1 * time.Second}, } t.Run("incorrect client secret loc", func(t *testing.T) { - metatdata := &service.OAuth2MetadataResponse{ + metadata := &service.OAuth2MetadataResponse{ TokenEndpoint: "http://localhost:8089/token", ScopesSupported: []string{"code", "all"}, } @@ -201,7 +201,7 @@ func TestGetAuthenticationDialOptionClientSecret(t *testing.T) { AuthorizationMetadataKey: "flyte_authorization", } mockAuthClient := new(mocks.AuthMetadataServiceClient) - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(clientMetatadata, nil) dialOption, err := getAuthenticationDialOption(ctx, incorrectSecretLocConfig, nil, mockAuthClient) assert.Nil(t, dialOption) @@ -219,7 +219,7 @@ func TestGetAuthenticationDialOptionPkce(t *testing.T) { AuthType: AuthTypePkce, PerRetryTimeout: config.Duration{Duration: 1 * time.Second}, } - metatdata := &service.OAuth2MetadataResponse{ + metadata := &service.OAuth2MetadataResponse{ TokenEndpoint: "http://localhost:8089/token", ScopesSupported: []string{"code", "all"}, } @@ -238,7 +238,7 @@ func TestGetAuthenticationDialOptionPkce(t *testing.T) { mockAuthClient := new(mocks.AuthMetadataServiceClient) mockTokenCache.OnGetTokenMatch().Return(&tokenData, nil) mockTokenCache.OnSaveTokenMatch(mock.Anything).Return(nil) - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(clientMetatadata, nil) tokenSourceProvider, err := NewTokenSourceProvider(ctx, adminServiceConfig, mockTokenCache, mockAuthClient) assert.Nil(t, err) @@ -252,7 +252,7 @@ func TestGetAuthenticationDialOptionPkce(t *testing.T) { mockAuthClient := new(mocks.AuthMetadataServiceClient) mockTokenCache.OnGetTokenMatch().Return(&tokenData, nil) mockTokenCache.OnSaveTokenMatch(mock.Anything).Return(nil) - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(clientMetatadata, nil) tokenSourceProvider, err := NewTokenSourceProvider(ctx, adminServiceConfig, mockTokenCache, mockAuthClient) assert.Nil(t, err) @@ -265,7 +265,7 @@ func TestGetAuthenticationDialOptionPkce(t *testing.T) { func Test_getPkceAuthTokenSource(t *testing.T) { ctx := context.Background() mockAuthClient := new(mocks.AuthMetadataServiceClient) - metatdata := &service.OAuth2MetadataResponse{ + metadata := &service.OAuth2MetadataResponse{ TokenEndpoint: "http://localhost:8089/token", ScopesSupported: []string{"code", "all"}, } @@ -275,7 +275,7 @@ func Test_getPkceAuthTokenSource(t *testing.T) { RedirectUri: "http://localhost:54546/callback", } - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(clientMetatadata, nil) t.Run("cached token expired", func(t *testing.T) { diff --git a/flyteidl/clients/go/admin/tokenorchestrator/base_token_orchestrator_test.go b/flyteidl/clients/go/admin/tokenorchestrator/base_token_orchestrator_test.go index 136719a9330..38669a723b1 100644 --- a/flyteidl/clients/go/admin/tokenorchestrator/base_token_orchestrator_test.go +++ b/flyteidl/clients/go/admin/tokenorchestrator/base_token_orchestrator_test.go @@ -45,7 +45,7 @@ func TestRefreshTheToken(t *testing.T) { func TestFetchFromCache(t *testing.T) { ctx := context.Background() - metatdata := &service.OAuth2MetadataResponse{ + metadata := &service.OAuth2MetadataResponse{ TokenEndpoint: "/token", ScopesSupported: []string{"code", "all"}, } @@ -54,7 +54,7 @@ func TestFetchFromCache(t *testing.T) { RedirectUri: "http://localhost:8089/redirect", } mockAuthClient := new(mocks.AuthMetadataServiceClient) - mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metatdata, nil) + mockAuthClient.OnGetOAuth2MetadataMatch(mock.Anything, mock.Anything).Return(metadata, nil) mockAuthClient.OnGetPublicClientConfigMatch(mock.Anything, mock.Anything).Return(clientMetatadata, nil) t.Run("no token in cache", func(t *testing.T) { diff --git a/flyteidl/clients/go/coreutils/literals.go b/flyteidl/clients/go/coreutils/literals.go index fb210565479..dde34bcc385 100644 --- a/flyteidl/clients/go/coreutils/literals.go +++ b/flyteidl/clients/go/coreutils/literals.go @@ -588,7 +588,7 @@ func MakeLiteralForType(t *core.LiteralType, v interface{}) (*core.Literal, erro var newV string if v == nil { if len(t.GetEnumType().Values) == 0 { - return nil, fmt.Errorf("enum types need atleast one value") + return nil, fmt.Errorf("enum types need at least one value") } newV = t.GetEnumType().Values[0] } else { diff --git a/flyteidl/generate_protos.sh b/flyteidl/generate_protos.sh index 0f3cfb3c5fe..2cb240a0dd5 100755 --- a/flyteidl/generate_protos.sh +++ b/flyteidl/generate_protos.sh @@ -102,7 +102,7 @@ rm -f gen/pb-go/flyteidl/service/admin.pb.gw.go-e if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Protos updated without commiting generated code." + echo "FAILED: Protos updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flyteidl/protos/docs/admin/admin.rst b/flyteidl/protos/docs/admin/admin.rst index 99b5b4096df..832f656c16f 100644 --- a/flyteidl/protos/docs/admin/admin.rst +++ b/flyteidl/protos/docs/admin/admin.rst @@ -1812,8 +1812,8 @@ User-provided launch plan definition and configuration values. "workflow_id", ":ref:`ref_flyteidl.core.Identifier`", "", "Reference to the Workflow template that the launch plan references" "entity_metadata", ":ref:`ref_flyteidl.admin.LaunchPlanMetadata`", "", "Metadata for the Launch Plan" - "default_inputs", ":ref:`ref_flyteidl.core.ParameterMap`", "", "Input values to be passed for the execution. These can be overriden when an execution is created with this launch plan." - "fixed_inputs", ":ref:`ref_flyteidl.core.LiteralMap`", "", "Fixed, non-overridable inputs for the Launch Plan. These can not be overriden when an execution is created with this launch plan." + "default_inputs", ":ref:`ref_flyteidl.core.ParameterMap`", "", "Input values to be passed for the execution. These can be overridden when an execution is created with this launch plan." + "fixed_inputs", ":ref:`ref_flyteidl.core.LiteralMap`", "", "Fixed, non-overridable inputs for the Launch Plan. These can not be overridden when an execution is created with this launch plan." "role", ":ref:`ref_string`", "", "**Deprecated.** String to indicate the role to use to execute the workflow underneath" "labels", ":ref:`ref_flyteidl.admin.Labels`", "", "Custom labels to be applied to the execution resource." "annotations", ":ref:`ref_flyteidl.admin.Annotations`", "", "Custom annotations to be applied to the execution resource." @@ -3324,7 +3324,7 @@ populated value (indicating the signal has been given). SignalGetOrCreateRequest ------------------------------------------------------------------ -SignalGetOrCreateRequest represents a request structure to retrive or create a signal. +SignalGetOrCreateRequest represents a request structure to retrieve or create a signal. See :ref:`ref_flyteidl.admin.Signal` for more details @@ -3907,7 +3907,7 @@ flyteidl/admin/workflow.proto CreateWorkflowFailureReason ------------------------------------------------------------------ -When a CreateWorkflowRequest failes due to matching id +When a CreateWorkflowRequest fails due to matching id diff --git a/flyteidl/protos/docs/datacatalog/datacatalog.rst b/flyteidl/protos/docs/datacatalog/datacatalog.rst index a699b883782..6ca4328f95f 100644 --- a/flyteidl/protos/docs/datacatalog/datacatalog.rst +++ b/flyteidl/protos/docs/datacatalog/datacatalog.rst @@ -378,7 +378,7 @@ Dataset. GetOrExtendReservationRequest ------------------------------------------------------------------ -Try to acquire or extend an artifact reservation. If an active reservation exists, retreive that instance. +Try to acquire or extend an artifact reservation. If an active reservation exists, retrieve that instance. diff --git a/flyteidl/protos/docs/service/index.rst b/flyteidl/protos/docs/service/index.rst index 8bcf45d9c45..6c5bebe6b28 100644 --- a/flyteidl/protos/docs/service/index.rst +++ b/flyteidl/protos/docs/service/index.rst @@ -1,7 +1,7 @@ REST and gRPC interface for the Flyte Admin Service =================================================== -This section provides all endpoint defintions that are implemented by the Admin service. +This section provides all endpoint definitions that are implemented by the Admin service. `Admin service raw protos `__ diff --git a/flyteidl/protos/flyteidl/admin/launch_plan.proto b/flyteidl/protos/flyteidl/admin/launch_plan.proto index 2164be31fd2..fdba6c1b7b6 100644 --- a/flyteidl/protos/flyteidl/admin/launch_plan.proto +++ b/flyteidl/protos/flyteidl/admin/launch_plan.proto @@ -85,11 +85,11 @@ message LaunchPlanSpec { LaunchPlanMetadata entity_metadata = 2; // Input values to be passed for the execution. - // These can be overriden when an execution is created with this launch plan. + // These can be overridden when an execution is created with this launch plan. core.ParameterMap default_inputs = 3; // Fixed, non-overridable inputs for the Launch Plan. - // These can not be overriden when an execution is created with this launch plan. + // These can not be overridden when an execution is created with this launch plan. core.LiteralMap fixed_inputs = 4; // String to indicate the role to use to execute the workflow underneath diff --git a/flyteidl/protos/flyteidl/admin/signal.proto b/flyteidl/protos/flyteidl/admin/signal.proto index 8fc1c83e584..105ae805246 100644 --- a/flyteidl/protos/flyteidl/admin/signal.proto +++ b/flyteidl/protos/flyteidl/admin/signal.proto @@ -8,7 +8,7 @@ import "flyteidl/core/identifier.proto"; import "flyteidl/core/literals.proto"; import "flyteidl/core/types.proto"; -// SignalGetOrCreateRequest represents a request structure to retrive or create a signal. +// SignalGetOrCreateRequest represents a request structure to retrieve or create a signal. // See :ref:`ref_flyteidl.admin.Signal` for more details message SignalGetOrCreateRequest { // A unique identifier for the requested signal. diff --git a/flyteidl/protos/flyteidl/admin/workflow.proto b/flyteidl/protos/flyteidl/admin/workflow.proto index b768cf9601c..a5e89f508ec 100644 --- a/flyteidl/protos/flyteidl/admin/workflow.proto +++ b/flyteidl/protos/flyteidl/admin/workflow.proto @@ -83,7 +83,7 @@ message WorkflowErrorExistsIdenticalStructure { core.Identifier id = 1; } -// When a CreateWorkflowRequest failes due to matching id +// When a CreateWorkflowRequest fails due to matching id message CreateWorkflowFailureReason { oneof reason { WorkflowErrorExistsDifferentStructure exists_different_structure = 1; diff --git a/flyteidl/protos/flyteidl/datacatalog/datacatalog.proto b/flyteidl/protos/flyteidl/datacatalog/datacatalog.proto index 6f059159f35..36e908c1e81 100644 --- a/flyteidl/protos/flyteidl/datacatalog/datacatalog.proto +++ b/flyteidl/protos/flyteidl/datacatalog/datacatalog.proto @@ -210,7 +210,7 @@ message ReservationID { string tag_name = 2; } -// Try to acquire or extend an artifact reservation. If an active reservation exists, retreive that instance. +// Try to acquire or extend an artifact reservation. If an active reservation exists, retrieve that instance. message GetOrExtendReservationRequest { // The unique ID for the reservation ReservationID reservation_id = 1; diff --git a/flyteidl/protos/flyteidl/service/dataproxy.proto b/flyteidl/protos/flyteidl/service/dataproxy.proto index 8972d4f6de7..5d533a414a0 100644 --- a/flyteidl/protos/flyteidl/service/dataproxy.proto +++ b/flyteidl/protos/flyteidl/service/dataproxy.proto @@ -129,7 +129,7 @@ message PreSignedURLs { // General request artifact to retrieve data from a Flyte artifact url. message GetDataRequest { // A unique identifier in the form of flyte:// that uniquely, for a given Flyte - // backend, identifies a Flyte artifact ([i]nput, [o]utput, flyte [d]eck, etc.). + // backend, identifies a Flyte artifact ([i]nput, [o]output, flyte [d]eck, etc.). // e.g. flyte://v1/proj/development/execid/n2/0/i (for 0th task execution attempt input) // flyte://v1/proj/development/execid/n2/i (for node execution input) // flyte://v1/proj/development/execid/n2/o/o3 (the o3 output of the second node) diff --git a/flyteplugins/boilerplate/flyte/docker_build/docker_build.sh b/flyteplugins/boilerplate/flyte/docker_build/docker_build.sh index a77f8a2d77c..817189aee17 100755 --- a/flyteplugins/boilerplate/flyte/docker_build/docker_build.sh +++ b/flyteplugins/boilerplate/flyte/docker_build/docker_build.sh @@ -43,7 +43,7 @@ fi docker build -t "$IMAGE_TAG_WITH_SHA" --target=${BUILD_PHASE} . echo "${IMAGE_TAG_WITH_SHA} built locally." -# if REGISTRY specified, push the images to the remote registy +# if REGISTRY specified, push the images to the remote registry if [ -n "$REGISTRY" ]; then if [ -n "${DOCKER_REGISTRY_PASSWORD}" ]; then diff --git a/flyteplugins/boilerplate/flyte/golang_test_targets/go-gen.sh b/flyteplugins/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/flyteplugins/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/flyteplugins/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flyteplugins/go/tasks/pluginmachinery/flytek8s/config/config.go b/flyteplugins/go/tasks/pluginmachinery/flytek8s/config/config.go index daaae6e6818..057b2fe145a 100755 --- a/flyteplugins/go/tasks/pluginmachinery/flytek8s/config/config.go +++ b/flyteplugins/go/tasks/pluginmachinery/flytek8s/config/config.go @@ -89,7 +89,7 @@ type K8sPluginConfig struct { // default memory requests for a container DefaultMemoryRequest resource.Quantity `json:"default-memory" pflag:",Defines a default value for memory for containers if not specified."` - // Default Tolerations that will be added to every Pod that is created by Flyte. These can be used in heterogenous clusters, where one wishes to keep all pods created by Flyte on a separate + // Default Tolerations that will be added to every Pod that is created by Flyte. These can be used in heterogeneous clusters, where one wishes to keep all pods created by Flyte on a separate // set of nodes. DefaultTolerations []v1.Toleration `json:"default-tolerations" pflag:"-,Tolerations to be applied for every node that is launched by Flyte. Useful in non dedicated flyte clusters"` // Default Node Selector Labels for pods. These NodeSelector labels are added to all pods, created by Flyte, unless they are marked as interruptible (default of interruptible are different). @@ -101,7 +101,7 @@ type K8sPluginConfig struct { SchedulerName string `json:"scheduler-name" pflag:",Defines scheduler name."` // ----------------------------------------------------------------- - // Special tolerations and node selector for Interruptible tasks. This allows scheduling interruptible tasks onto specific hardward + // Special tolerations and node selector for Interruptible tasks. This allows scheduling interruptible tasks onto specific hardware // Tolerations for interruptible k8s pods: These tolerations are added to the pods that can tolerate getting evicted from a node. We // can leverage this for better bin-packing and using low-reliability cheaper machines. diff --git a/flyteplugins/go/tasks/pluginmachinery/registry.go b/flyteplugins/go/tasks/pluginmachinery/registry.go index 76e7844bbd5..633c8a676fc 100644 --- a/flyteplugins/go/tasks/pluginmachinery/registry.go +++ b/flyteplugins/go/tasks/pluginmachinery/registry.go @@ -56,7 +56,7 @@ func (p *taskPluginRegistry) RegisterK8sPlugin(info k8s.PluginEntry) { } if len(info.RegisteredTaskTypes) == 0 { - logger.Panicf(context.TODO(), "K8s AsyncPlugin should be registered to handle atleast one task type") + logger.Panicf(context.TODO(), "K8s AsyncPlugin should be registered to handle at least one task type") } if info.Plugin == nil { @@ -78,7 +78,7 @@ func (p *taskPluginRegistry) RegisterCorePlugin(info core.PluginEntry) { logger.Panicf(context.TODO(), "ID is required attribute for k8s plugin") } if len(info.RegisteredTaskTypes) == 0 { - logger.Panicf(context.TODO(), "AsyncPlugin should be registered to handle atleast one task type") + logger.Panicf(context.TODO(), "AsyncPlugin should be registered to handle at least one task type") } if info.LoadPlugin == nil { logger.Panicf(context.TODO(), "PluginLoader cannot be nil") diff --git a/flyteplugins/go/tasks/plugins/array/arraystatus/status.go b/flyteplugins/go/tasks/plugins/array/arraystatus/status.go index 1a5d745254d..e74b952a1cc 100644 --- a/flyteplugins/go/tasks/plugins/array/arraystatus/status.go +++ b/flyteplugins/go/tasks/plugins/array/arraystatus/status.go @@ -23,7 +23,7 @@ type ArrayStatus struct { Detailed bitarray.CompactArray `json:"details"` } -// HashCode computes a hash of the phase indicies stored in the Detailed array to uniquely represent +// HashCode computes a hash of the phase indices stored in the Detailed array to uniquely represent // a collection of subtask phases. func (a ArrayStatus) HashCode() (uint64, error) { hash := fnv.New64() diff --git a/flyteplugins/go/tasks/plugins/array/k8s/subtask.go b/flyteplugins/go/tasks/plugins/array/k8s/subtask.go index aefb4ac5bb6..150c9865ab9 100644 --- a/flyteplugins/go/tasks/plugins/array/k8s/subtask.go +++ b/flyteplugins/go/tasks/plugins/array/k8s/subtask.go @@ -80,7 +80,7 @@ func addMetadata(stCtx SubTaskExecutionContext, cfg *Config, k8sPluginCfg *confi } // The legacy map task implemented these as overrides so they were left as such. May want to - // revist whether they would serve better as appends. + // revisit whether they would serve better as appends. if len(cfg.NodeSelector) != 0 { pod.Spec.NodeSelector = cfg.NodeSelector } diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/mpi/mpi_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/mpi/mpi_test.go index 4a442b29ad2..b477ad93ece 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/mpi/mpi_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/mpi/mpi_test.go @@ -83,7 +83,7 @@ func dummyMPITaskTemplate(id string, args ...interface{}) *core.TaskTemplate { var mpiCustomObj = t mpiObjJSON, err = utils.MarshalToString(mpiCustomObj) default: - err = fmt.Errorf("Unkonw input type %T", t) + err = fmt.Errorf("Unknown input type %T", t) } } diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go index 69b2cb55635..4446f5b3ad7 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/pytorch/pytorch_test.go @@ -92,7 +92,7 @@ func dummyPytorchTaskTemplate(id string, args ...interface{}) *core.TaskTemplate var pytorchCustomObj = t ptObjJSON, err = utils.MarshalToString(pytorchCustomObj) default: - err = fmt.Errorf("Unkonw input type %T", t) + err = fmt.Errorf("Unknown input type %T", t) } } diff --git a/flyteplugins/go/tasks/plugins/k8s/kfoperators/tensorflow/tensorflow_test.go b/flyteplugins/go/tasks/plugins/k8s/kfoperators/tensorflow/tensorflow_test.go index 5ec5658d828..f0013bbf928 100644 --- a/flyteplugins/go/tasks/plugins/k8s/kfoperators/tensorflow/tensorflow_test.go +++ b/flyteplugins/go/tasks/plugins/k8s/kfoperators/tensorflow/tensorflow_test.go @@ -85,7 +85,7 @@ func dummyTensorFlowTaskTemplate(id string, args ...interface{}) *core.TaskTempl var tensorflowCustomObj = t tfObjJSON, err = utils.MarshalToString(tensorflowCustomObj) default: - err = fmt.Errorf("Unkonw input type %T", t) + err = fmt.Errorf("Unknown input type %T", t) } } diff --git a/flyteplugins/tests/end_to_end.go b/flyteplugins/tests/end_to_end.go index dac473dfdc7..0271ba86d7e 100644 --- a/flyteplugins/tests/end_to_end.go +++ b/flyteplugins/tests/end_to_end.go @@ -209,7 +209,7 @@ func RunPluginEndToEndTest(t *testing.T, executor pluginCore.Plugin, template *i or := args.Get(2).(io.OutputReader) o, ee, err := or.Read(ctx) assert.NoError(t, err) - // TODO: Outputing error is not yet supported. + // TODO: Outputting error is not yet supported. assert.Nil(t, ee) catData.Store(key, o) }) diff --git a/flytepropeller/boilerplate/flyte/docker_build/docker_build.sh b/flytepropeller/boilerplate/flyte/docker_build/docker_build.sh index a77f8a2d77c..817189aee17 100755 --- a/flytepropeller/boilerplate/flyte/docker_build/docker_build.sh +++ b/flytepropeller/boilerplate/flyte/docker_build/docker_build.sh @@ -43,7 +43,7 @@ fi docker build -t "$IMAGE_TAG_WITH_SHA" --target=${BUILD_PHASE} . echo "${IMAGE_TAG_WITH_SHA} built locally." -# if REGISTRY specified, push the images to the remote registy +# if REGISTRY specified, push the images to the remote registry if [ -n "$REGISTRY" ]; then if [ -n "${DOCKER_REGISTRY_PASSWORD}" ]; then diff --git a/flytepropeller/boilerplate/flyte/golang_test_targets/go-gen.sh b/flytepropeller/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/flytepropeller/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/flytepropeller/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flytepropeller/cmd/manager/cmd/root.go b/flytepropeller/cmd/manager/cmd/root.go index fc3da4af620..8cf2b939e5d 100644 --- a/flytepropeller/cmd/manager/cmd/root.go +++ b/flytepropeller/cmd/manager/cmd/root.go @@ -73,7 +73,7 @@ Sample configuration, illustrating 3 separate sharding techniques, is provided b - flyteexamples - flytelab - # process the 'production' domain on a single instace and all other domains on another + # process the 'production' domain on a single instance and all other domains on another type: domain enableUncoveredReplica: true replicas: diff --git a/flytepropeller/manager/doc.go b/flytepropeller/manager/doc.go index 648b2079a45..025b60cf1f8 100644 --- a/flytepropeller/manager/doc.go +++ b/flytepropeller/manager/doc.go @@ -3,7 +3,7 @@ Package manager introduces a FlytePropeller Manager implementation that enables The FlytePropeller Manager manages a collection of FlytePropeller instances to effectively distribute load. Each managed FlytePropller instance is created as a k8s pod using a configurable k8s PodTemplate resource. The FlytePropeller Manager use a control loop to periodically check the status of managed FlytePropeller instances and creates, updates, or deletes pods as required. It is important to note that if the FlytePropeller Manager fails, managed instances are left running. This is in effort to ensure progress continues in evaluating FlyteWorkflow CRDs. -FlytePropeller Manager is configured at the root of the FlytePropeller configurtion. Below is an example of the variety of configuration options along with succinct associated descriptions for each field: +FlytePropeller Manager is configured at the root of the FlytePropeller configuration. Below is an example of the variety of configuration options along with succinct associated descriptions for each field: manager: pod-application: "flytepropeller" # application name for managed pods @@ -20,7 +20,7 @@ FlytePropeller Manager handles dynamic updates to both the k8s PodTemplate and s Flyte defines a variety of Shard Strategies for configuring how FlyteWorkflows are sharded. These options may include the shard type (ex. hash, project, or domain) along with the number of shards or the distribution of project / domain IDs over shards. -Internally, FlyteWorkflow CRDs are initialized with k8s labels for project, domain, and a shard-key. The project and domain label values are associated with the environment of the registered workflow. The shard-key value is a range-bounded hash over various components of the FlyteWorkflow metadata, currently the keyspace range is defined as [0,32). A sharded Flyte deployment ensures deterministic FlyteWorkflow evalutions by setting disjoint k8s label selectors, based on the aforementioned labels, on each managed FlytePropeller instance. This ensures that only a single FlytePropeller instance is responsible for processing each FlyteWorkflow. +Internally, FlyteWorkflow CRDs are initialized with k8s labels for project, domain, and a shard-key. The project and domain label values are associated with the environment of the registered workflow. The shard-key value is a range-bounded hash over various components of the FlyteWorkflow metadata, currently the keyspace range is defined as [0,32). A sharded Flyte deployment ensures deterministic FlyteWorkflow evaluations by setting disjoint k8s label selectors, based on the aforementioned labels, on each managed FlytePropeller instance. This ensures that only a single FlytePropeller instance is responsible for processing each FlyteWorkflow. The Hash Shard Strategy, denoted by "type: hash" in the configuration below, uses consistent hashing to evenly distribute FlyteWorkflows over managed FlytePropeller instances. This is achieved by partitioning the keyspace (i.e. [0,32)) into a collection of disjoint ranges and using label selectors to assign those ranges to managed FlytePropeller instances. For example, with "shard-count: 4" the first instance is responsible for FlyteWorkflows with "shard-keys" in the range [0,8), the second [8,16), the third [16,24), and the fourth [24,32). It may be useful to note that the default shard type is "hash", so it will be implicitly defined if otherwise left out of the configuration. An example configuration for the Hash Shard Strategy is provided below: diff --git a/flytepropeller/manager/shardstrategy/environment.go b/flytepropeller/manager/shardstrategy/environment.go index e6b819cbd21..c7dec1bfff0 100644 --- a/flytepropeller/manager/shardstrategy/environment.go +++ b/flytepropeller/manager/shardstrategy/environment.go @@ -8,7 +8,7 @@ import ( v1 "k8s.io/api/core/v1" ) -// EnvironmentShardStrategy assigns either project or domain identifers to individual +// EnvironmentShardStrategy assigns either project or domain identifiers to individual // FlytePropeller instances to determine FlyteWorkflow processing responsibility. type EnvironmentShardStrategy struct { EnvType environmentType diff --git a/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/execution_config.go b/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/execution_config.go index 0068b7a6320..12e5191b232 100644 --- a/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/execution_config.go +++ b/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/execution_config.go @@ -32,7 +32,7 @@ type ExecutionConfig struct { Interruptible *bool // Defines whether a workflow should skip all its cached results and re-compute its output, overwriting any already stored data. OverwriteCache bool - // Defines a map of environment varable name / value pairs that are applied to all tasks. + // Defines a map of environment variable name / value pairs that are applied to all tasks. EnvironmentVariables map[string]string } diff --git a/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/nodes.go b/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/nodes.go index 21c8b026105..3ff76f3d53c 100644 --- a/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/nodes.go +++ b/flytepropeller/pkg/apis/flyteworkflow/v1alpha1/nodes.go @@ -64,9 +64,9 @@ func (in *Binding) DeepCopyInto(out *Binding) { // Strategy to be used to Retry a node that is in RetryableFailure state type RetryStrategy struct { - // MinAttempts implies the atleast n attempts to try this node before giving up. The atleast here is because we may + // MinAttempts implies the at least n attempts to try this node before giving up. The at least here is because we may // fail to write the attempt information and end up retrying again. - // Also `0` and `1` both mean atleast one attempt will be done. 0 is a degenerate case. + // Also `0` and `1` both mean at least one attempt will be done. 0 is a degenerate case. MinAttempts *int `json:"minAttempts"` // TODO Add retrydelay? } diff --git a/flytepropeller/pkg/compiler/errors/compiler_errors.go b/flytepropeller/pkg/compiler/errors/compiler_errors.go index b73b1927d64..f3fd02f96f2 100755 --- a/flytepropeller/pkg/compiler/errors/compiler_errors.go +++ b/flytepropeller/pkg/compiler/errors/compiler_errors.go @@ -73,7 +73,7 @@ const ( // An unknown error occurred while building the workflow. WorkflowBuildError ErrorCode = "WorkflowBuildError" - // A value is expected to be unique but wasnt. + // A value is expected to be unique but wasn't. ValueCollision ErrorCode = "ValueCollision" // A value isn't on the right syntax. diff --git a/flytepropeller/pkg/compiler/transformers/k8s/utils.go b/flytepropeller/pkg/compiler/transformers/k8s/utils.go index 9b4bd638248..85a8c405649 100644 --- a/flytepropeller/pkg/compiler/transformers/k8s/utils.go +++ b/flytepropeller/pkg/compiler/transformers/k8s/utils.go @@ -89,7 +89,7 @@ func StripTypeMetadata(t *core.LiteralType) *core.LiteralType { c.Annotation = nil // Note that we cannot strip `Structure` from the type because the dynamic node output type is used to validate the // interface of the dynamically compiled workflow. `Structure` is used to extend type checking information on - // differnent Flyte types and is therefore required to ensure correct type validation. + // different Flyte types and is therefore required to ensure correct type validation. switch underlyingType := c.Type.(type) { case *core.LiteralType_UnionType: diff --git a/flytepropeller/pkg/compiler/validators/utils_test.go b/flytepropeller/pkg/compiler/validators/utils_test.go index 3557ba0ec8b..646b35d9e85 100644 --- a/flytepropeller/pkg/compiler/validators/utils_test.go +++ b/flytepropeller/pkg/compiler/validators/utils_test.go @@ -14,7 +14,7 @@ func TestLiteralTypeForLiterals(t *testing.T) { assert.Equal(t, core.SimpleType_NONE.String(), lt.GetSimple().String()) }) - t.Run("homogenous", func(t *testing.T) { + t.Run("homogeneous", func(t *testing.T) { lt := literalTypeForLiterals([]*core.Literal{ coreutils.MustMakeLiteral(5), coreutils.MustMakeLiteral(0), diff --git a/flytepropeller/pkg/controller/handler.go b/flytepropeller/pkg/controller/handler.go index a16755787d5..0aac36a04cc 100644 --- a/flytepropeller/pkg/controller/handler.go +++ b/flytepropeller/pkg/controller/handler.go @@ -267,7 +267,7 @@ func (p *Propeller) Handle(ctx context.Context, namespace, name string) error { } if err != nil { - // NOTE We are overriding the deepcopy here, as we are essentially ingnoring all mutations + // NOTE We are overriding the deepcopy here, as we are essentially ignoring all mutations // We only want to increase failed attempts and discard any other partial changes to the CRD. mutatedWf = RecordSystemError(w, err) p.metrics.SystemError.Inc(ctx) diff --git a/flytepropeller/pkg/controller/nodes/array/handler.go b/flytepropeller/pkg/controller/nodes/array/handler.go index a8cc13c8527..7e4c6d1adfb 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler.go +++ b/flytepropeller/pkg/controller/nodes/array/handler.go @@ -369,7 +369,7 @@ func (a *arrayNodeHandler) Handle(ctx context.Context, nCtx interfaces.NodeExecu } if len(arrayNodeState.SubNodePhases.GetItems())-failedCount < minSuccesses { - // no chance to reach the mininum number of successes + // no chance to reach the minimum number of successes arrayNodeState.Phase = v1alpha1.ArrayNodePhaseFailing } else if successCount >= minSuccesses && runningCount == 0 { // wait until all tasks have completed before declaring success @@ -480,7 +480,7 @@ func (a *arrayNodeHandler) Handle(ctx context.Context, nCtx interfaces.NodeExecu // need to increment taskPhaseVersion if arrayNodeState.Phase does not change, otherwise // reset to 0. by incrementing this always we report an event and ensure processing - // everytime the ArrayNode is evaluated. if this overhead becomes too large, we will need + // every time the ArrayNode is evaluated. if this overhead becomes too large, we will need // to revisit and only increment when any subNode state changes. if currentArrayNodePhase != arrayNodeState.Phase { arrayNodeState.TaskPhaseVersion = 0 @@ -547,7 +547,7 @@ func (a *arrayNodeHandler) buildArrayNodeContext(ctx context.Context, nCtx inter nodePhase := v1alpha1.NodePhase(arrayNodeState.SubNodePhases.GetItem(subNodeIndex)) taskPhase := int(arrayNodeState.SubNodeTaskPhases.GetItem(subNodeIndex)) - // need to initialize the inputReader everytime to ensure TaskHandler can access for cache lookups / population + // need to initialize the inputReader every time to ensure TaskHandler can access for cache lookups / population inputLiteralMap, err := constructLiteralMap(ctx, nCtx.InputReader(), subNodeIndex) if err != nil { return nil, nil, nil, nil, nil, nil, nil, err diff --git a/flytepropeller/pkg/controller/nodes/array/handler_test.go b/flytepropeller/pkg/controller/nodes/array/handler_test.go index 3a5f84965ba..821bcd2d061 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler_test.go +++ b/flytepropeller/pkg/controller/nodes/array/handler_test.go @@ -205,7 +205,7 @@ func TestAbort(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - // initailize universal variables + // initialize universal variables literalMap := convertMapToArrayLiterals(test.inputMap) size := -1 @@ -301,7 +301,7 @@ func TestFinalize(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - // initailize universal variables + // initialize universal variables literalMap := convertMapToArrayLiterals(test.inputMap) size := -1 @@ -440,7 +440,7 @@ func TestHandleArrayNodePhaseExecuting(t *testing.T) { ctx := context.Background() minSuccessRatio := float32(0.5) - // initailize universal variables + // initialize universal variables inputMap := map[string][]int64{ "foo": []int64{0, 1}, "bar": []int64{2, 3}, diff --git a/flytepropeller/pkg/controller/nodes/cache.go b/flytepropeller/pkg/controller/nodes/cache.go index fbae02b1665..dba10c10bb3 100644 --- a/flytepropeller/pkg/controller/nodes/cache.go +++ b/flytepropeller/pkg/controller/nodes/cache.go @@ -133,7 +133,7 @@ func (n *nodeExecutor) CheckCatalogCache(ctx context.Context, nCtx interfaces.No } // GetOrExtendCatalogReservation attempts to acquire an artifact reservation if the task is -// cachable and cache serializable. If the reservation already exists for this owner, the +// cacheable and cache serializable. If the reservation already exists for this owner, the // reservation is extended. func (n *nodeExecutor) GetOrExtendCatalogReservation(ctx context.Context, nCtx interfaces.NodeExecutionContext, cacheHandler interfaces.CacheableNodeHandler, heartbeatInterval time.Duration) (catalog.ReservationEntry, error) { @@ -169,7 +169,7 @@ func (n *nodeExecutor) GetOrExtendCatalogReservation(ctx context.Context, nCtx i reservation.HeartbeatInterval.AsDuration(), reservation.OwnerId, status), nil } -// ReleaseCatalogReservation attempts to release an artifact reservation if the task is cachable +// ReleaseCatalogReservation attempts to release an artifact reservation if the task is cacheable // and cache serializable. If the reservation does not exist for this owner (e.x. it never existed // or has been acquired by another owner) this call is still successful. func (n *nodeExecutor) ReleaseCatalogReservation(ctx context.Context, nCtx interfaces.NodeExecutionContext, diff --git a/flytepropeller/pkg/controller/nodes/cache_test.go b/flytepropeller/pkg/controller/nodes/cache_test.go index 6d9c4fce9f2..5a585d06017 100644 --- a/flytepropeller/pkg/controller/nodes/cache_test.go +++ b/flytepropeller/pkg/controller/nodes/cache_test.go @@ -203,7 +203,7 @@ func TestCheckCatalogCache(t *testing.T) { catalogSkipCount: labeled.NewCounter("discovery_skip_count", "Task cached skipped in Discovery", testScope), catalogPutSuccessCount: labeled.NewCounter("discovery_put_success_count", "Discovery Put success count", testScope), catalogPutFailureCount: labeled.NewCounter("discovery_put_failure_count", "Discovery Put failure count", testScope), - catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get faillure count", testScope), + catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get failure count", testScope), reservationGetFailureCount: labeled.NewCounter("reservation_get_failure_count", "Reservation GetOrExtend failure count", testScope), reservationGetSuccessCount: labeled.NewCounter("reservation_get_success_count", "Reservation GetOrExtend success count", testScope), reservationReleaseFailureCount: labeled.NewCounter("reservation_release_failure_count", "Reservation Release failure count", testScope), @@ -275,7 +275,7 @@ func TestGetOrExtendCatalogReservation(t *testing.T) { catalogSkipCount: labeled.NewCounter("discovery_skip_count", "Task cached skipped in Discovery", testScope), catalogPutSuccessCount: labeled.NewCounter("discovery_put_success_count", "Discovery Put success count", testScope), catalogPutFailureCount: labeled.NewCounter("discovery_put_failure_count", "Discovery Put failure count", testScope), - catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get faillure count", testScope), + catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get failure count", testScope), reservationGetFailureCount: labeled.NewCounter("reservation_get_failure_count", "Reservation GetOrExtend failure count", testScope), reservationGetSuccessCount: labeled.NewCounter("reservation_get_success_count", "Reservation GetOrExtend success count", testScope), reservationReleaseFailureCount: labeled.NewCounter("reservation_release_failure_count", "Reservation Release failure count", testScope), @@ -336,7 +336,7 @@ func TestReleaseCatalogReservation(t *testing.T) { catalogSkipCount: labeled.NewCounter("discovery_skip_count", "Task cached skipped in Discovery", testScope), catalogPutSuccessCount: labeled.NewCounter("discovery_put_success_count", "Discovery Put success count", testScope), catalogPutFailureCount: labeled.NewCounter("discovery_put_failure_count", "Discovery Put failure count", testScope), - catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get faillure count", testScope), + catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get failure count", testScope), reservationGetFailureCount: labeled.NewCounter("reservation_get_failure_count", "Reservation GetOrExtend failure count", testScope), reservationGetSuccessCount: labeled.NewCounter("reservation_get_success_count", "Reservation GetOrExtend success count", testScope), reservationReleaseFailureCount: labeled.NewCounter("reservation_release_failure_count", "Reservation Release failure count", testScope), @@ -410,7 +410,7 @@ func TestWriteCatalogCache(t *testing.T) { catalogSkipCount: labeled.NewCounter("discovery_skip_count", "Task cached skipped in Discovery", testScope), catalogPutSuccessCount: labeled.NewCounter("discovery_put_success_count", "Discovery Put success count", testScope), catalogPutFailureCount: labeled.NewCounter("discovery_put_failure_count", "Discovery Put failure count", testScope), - catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get faillure count", testScope), + catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get failure count", testScope), reservationGetFailureCount: labeled.NewCounter("reservation_get_failure_count", "Reservation GetOrExtend failure count", testScope), reservationGetSuccessCount: labeled.NewCounter("reservation_get_success_count", "Reservation GetOrExtend success count", testScope), reservationReleaseFailureCount: labeled.NewCounter("reservation_release_failure_count", "Reservation Release failure count", testScope), diff --git a/flytepropeller/pkg/controller/nodes/catalog/datacatalog/datacatalog.go b/flytepropeller/pkg/controller/nodes/catalog/datacatalog/datacatalog.go index 3009af038c4..869d0264312 100644 --- a/flytepropeller/pkg/controller/nodes/catalog/datacatalog/datacatalog.go +++ b/flytepropeller/pkg/controller/nodes/catalog/datacatalog/datacatalog.go @@ -360,7 +360,7 @@ func (m *CatalogClient) Update(ctx context.Context, key catalog.Key, reader io.O return catalogStatus, nil } -// GetOrExtendReservation attempts to get a reservation for the cachable task. If you have +// GetOrExtendReservation attempts to get a reservation for the cacheable task. If you have // previously acquired a reservation it will be extended. If another entity holds the reservation // that is returned. func (m *CatalogClient) GetOrExtendReservation(ctx context.Context, key catalog.Key, ownerID string, heartbeatInterval time.Duration) (*datacatalog.Reservation, error) { @@ -400,7 +400,7 @@ func (m *CatalogClient) GetOrExtendReservation(ctx context.Context, key catalog. return response.Reservation, nil } -// ReleaseReservation attempts to release a reservation for a cachable task. If the reservation +// ReleaseReservation attempts to release a reservation for a cacheable task. If the reservation // does not exist (e.x. it never existed or has been acquired by another owner) then this call // still succeeds. func (m *CatalogClient) ReleaseReservation(ctx context.Context, key catalog.Key, ownerID string) error { diff --git a/flytepropeller/pkg/controller/nodes/catalog/datacatalog/transformer.go b/flytepropeller/pkg/controller/nodes/catalog/datacatalog/transformer.go index 33669e3c737..c89b8e8e1c9 100644 --- a/flytepropeller/pkg/controller/nodes/catalog/datacatalog/transformer.go +++ b/flytepropeller/pkg/controller/nodes/catalog/datacatalog/transformer.go @@ -20,7 +20,7 @@ const taskNamespace = "flyte_task" const maxParamHashLength = 8 // Declare the definition of empty literal and variable maps. This is important because we hash against -// the literal and variable maps. So Nil and empty literals and variable maps should translate to these defintions +// the literal and variable maps. So Nil and empty literals and variable maps should translate to these definitions // in order to have a consistent hash. var emptyLiteralMap = core.LiteralMap{Literals: map[string]*core.Literal{}} var emptyVariableMap = core.VariableMap{Variables: map[string]*core.Variable{}} diff --git a/flytepropeller/pkg/controller/nodes/executor.go b/flytepropeller/pkg/controller/nodes/executor.go index 011acd0bb7a..e0bde80dd7b 100644 --- a/flytepropeller/pkg/controller/nodes/executor.go +++ b/flytepropeller/pkg/controller/nodes/executor.go @@ -7,7 +7,7 @@ // // Available node handlers are // - Task: Arguably the most important handler as it handles all tasks. These include all plugins. The goal of the workflow is -// is to run tasks, thus every workflow will contain atleast one TaskNode (except for the case, where the workflow +// is to run tasks, thus every workflow will contain at least one TaskNode (except for the case, where the workflow // is purely a meta-workflow and can run other workflows // - SubWorkflow: This is one of the most important handlers. It can execute Workflows that are nested inside a workflow // - DynamicTask Handler: This is just a decorator on the Task Handler. It handles cases, in which the Task returns a futures @@ -1414,7 +1414,7 @@ func NewExecutor(ctx context.Context, nodeConfig config.NodeConfig, store *stora catalogSkipCount: labeled.NewCounter("discovery_skip_count", "Task cached skipped in Discovery", scope), catalogPutSuccessCount: labeled.NewCounter("discovery_put_success_count", "Discovery Put success count", scope), catalogPutFailureCount: labeled.NewCounter("discovery_put_failure_count", "Discovery Put failure count", scope), - catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get faillure count", scope), + catalogGetFailureCount: labeled.NewCounter("discovery_get_failure_count", "Discovery Get failure count", scope), reservationGetFailureCount: labeled.NewCounter("reservation_get_failure_count", "Reservation GetOrExtend failure count", scope), reservationGetSuccessCount: labeled.NewCounter("reservation_get_success_count", "Reservation GetOrExtend success count", scope), reservationReleaseFailureCount: labeled.NewCounter("reservation_release_failure_count", "Reservation Release failure count", scope), diff --git a/flytepropeller/pkg/controller/nodes/executor_test.go b/flytepropeller/pkg/controller/nodes/executor_test.go index 3a474b39e80..a254e946858 100644 --- a/flytepropeller/pkg/controller/nodes/executor_test.go +++ b/flytepropeller/pkg/controller/nodes/executor_test.go @@ -440,7 +440,7 @@ func TestNodeExecutor_RecursiveNodeHandler_RecurseEndNode(t *testing.T) { }, false}, {"queued->failed", v1alpha1.NodePhaseQueued, v1alpha1.NodePhaseFailed, interfaces.NodePhaseFailed, func() (handler.Transition, error) { - return handler.DoTransition(handler.TransitionTypeEphemeral, handler.PhaseInfoFailure(core.ExecutionError_USER, "code", "mesage", nil)), nil + return handler.DoTransition(handler.TransitionTypeEphemeral, handler.PhaseInfoFailure(core.ExecutionError_USER, "code", "message", nil)), nil }, false}, {"queued->running", v1alpha1.NodePhaseQueued, v1alpha1.NodePhaseRunning, interfaces.NodePhasePending, func() (handler.Transition, error) { diff --git a/flytepropeller/pkg/controller/nodes/node_exec_context.go b/flytepropeller/pkg/controller/nodes/node_exec_context.go index ba43d1ba777..2579fdbe3ef 100644 --- a/flytepropeller/pkg/controller/nodes/node_exec_context.go +++ b/flytepropeller/pkg/controller/nodes/node_exec_context.go @@ -84,7 +84,7 @@ func (e eventRecorder) RecordNodeEvent(ctx context.Context, nodeEvent *event.Nod return nil } logger.Warningf(ctx, "Failed to record nodeEvent, error [%s]", err.Error()) - return nodeerrors.Wrapf(nodeerrors.IllegalStateError, nodeEvent.Id.NodeId, err, "phase mis-match mismatch between propeller and control plane; Trying to record Node p: %s", nodeEvent.Phase) + return nodeerrors.Wrapf(nodeerrors.IllegalStateError, nodeEvent.Id.NodeId, err, "phase mismatch mismatch between propeller and control plane; Trying to record Node p: %s", nodeEvent.Phase) } } return err diff --git a/flytepropeller/pkg/controller/nodes/subworkflow/launchplan/errors.go b/flytepropeller/pkg/controller/nodes/subworkflow/launchplan/errors.go index 54a9b6b9a1a..af4d691a79e 100644 --- a/flytepropeller/pkg/controller/nodes/subworkflow/launchplan/errors.go +++ b/flytepropeller/pkg/controller/nodes/subworkflow/launchplan/errors.go @@ -10,7 +10,7 @@ const ( RemoteErrorAlreadyExists ErrorCode = "AlreadyExists" RemoteErrorNotFound ErrorCode = "NotFound" RemoteErrorSystem ErrorCode = "SystemError" // timeouts, network error etc - RemoteErrorUser ErrorCode = "UserError" // Incase of bad specification, invalid arguments, etc + RemoteErrorUser ErrorCode = "UserError" // In case of bad specification, invalid arguments, etc ) // Checks if the error is of type RemoteError and the ErrorCode is of type RemoteErrorAlreadyExists diff --git a/flytepropeller/pkg/controller/nodes/task/handler.go b/flytepropeller/pkg/controller/nodes/task/handler.go index d2d2107dbaf..600b67076ea 100644 --- a/flytepropeller/pkg/controller/nodes/task/handler.go +++ b/flytepropeller/pkg/controller/nodes/task/handler.go @@ -858,7 +858,7 @@ func New(ctx context.Context, kubeClient executors.Client, client catalog.Client pluginsForType: make(map[pluginCore.TaskType]map[pluginID]pluginCore.Plugin), taskMetricsMap: make(map[MetricKey]*taskMetrics), metrics: &metrics{ - pluginPanics: labeled.NewCounter("plugin_panic", "Task plugin paniced when trying to execute a Handler.", scope), + pluginPanics: labeled.NewCounter("plugin_panic", "Task plugin panicked when trying to execute a Handler.", scope), unsupportedTaskType: labeled.NewCounter("unsupported_tasktype", "No Handler plugin configured for Handler type", scope), pluginExecutionLatency: labeled.NewStopWatch("plugin_exec_latency", "Time taken to invoke plugin for one round", time.Microsecond, scope), pluginQueueLatency: labeled.NewStopWatch("plugin_queue_latency", "Time spent by plugin in queued phase", time.Microsecond, scope), diff --git a/flytepropeller/pkg/controller/nodes/transformers.go b/flytepropeller/pkg/controller/nodes/transformers.go index 6e09c103b42..07253894f8a 100644 --- a/flytepropeller/pkg/controller/nodes/transformers.go +++ b/flytepropeller/pkg/controller/nodes/transformers.go @@ -104,7 +104,7 @@ func ToNodeExecutionEvent(nodeExecID *core.NodeExecutionIdentifier, var nev *event.NodeExecutionEvent // Start node is special case where the Inputs and Outputs are the same and hence here we copy the Output file - // into the OutputResult and in admin we copy it over into input aswell. + // into the OutputResult and in admin we copy it over into input as well. if nodeExecID.NodeId == v1alpha1.StartNodeID { outputsFile := v1alpha1.GetOutputsFile(status.GetOutputDir()) nev = &event.NodeExecutionEvent{ diff --git a/flytepropeller/pkg/controller/workflow/executor.go b/flytepropeller/pkg/controller/workflow/executor.go index 11a76621355..9f86dea139f 100644 --- a/flytepropeller/pkg/controller/workflow/executor.go +++ b/flytepropeller/pkg/controller/workflow/executor.go @@ -337,7 +337,7 @@ func (c *workflowExecutor) TransitionToPhase(ctx context.Context, execID *core.W return nil } if eventsErr.IsEventAlreadyInTerminalStateError(recordingErr) { - // Move to WorkflowPhaseFailed for state mis-match + // Move to WorkflowPhaseFailed for state mismatch msg := fmt.Sprintf("workflow state mismatch between propeller and control plane; Propeller State: %s, ExecutionId %s", wfEvent.Phase.String(), wfEvent.ExecutionId) logger.Warningf(ctx, msg) wStatus.UpdatePhase(v1alpha1.WorkflowPhaseFailed, msg, nil) diff --git a/flytepropeller/pkg/controller/workflowstore/resource_version_caching.go b/flytepropeller/pkg/controller/workflowstore/resource_version_caching.go index d04c65cc82e..234b0721540 100644 --- a/flytepropeller/pkg/controller/workflowstore/resource_version_caching.go +++ b/flytepropeller/pkg/controller/workflowstore/resource_version_caching.go @@ -18,7 +18,7 @@ type resourceVersionMetrics struct { workflowRedundantUpdatesCount labeled.Counter } -// Simple function that covnerts the namespace and name to a string +// Simple function that converts the namespace and name to a string func resourceVersionKey(namespace, name string) string { return fmt.Sprintf("%s/%s", namespace, name) } diff --git a/flytepropeller/pkg/webhook/vault_secret_manager_test.go b/flytepropeller/pkg/webhook/vault_secret_manager_test.go index 52c88c196b4..8c048a258fb 100644 --- a/flytepropeller/pkg/webhook/vault_secret_manager_test.go +++ b/flytepropeller/pkg/webhook/vault_secret_manager_test.go @@ -252,7 +252,7 @@ func TestVaultSecretManagerInjector_Inject(t *testing.T) { wantErr: false, }, { - name: "DB Secret backend enginge is supported", + name: "DB Secret backend engine is supported", args: args{ cfg: config.VaultSecretManagerConfig{Role: "flyte", KVVersion: config.KVVersion1}, secret: &coreIdl.Secret{ diff --git a/flytepropeller/script/fold-logs.py b/flytepropeller/script/fold-logs.py index 05a551ca27a..54ff80a6d3e 100755 --- a/flytepropeller/script/fold-logs.py +++ b/flytepropeller/script/fold-logs.py @@ -9,7 +9,7 @@ import re import sys -header = """Timestamp Line Duration Heirarchical Log Layout +header = """Timestamp Line Duration Hierarchical Log Layout ----------------------------------------------------------------------------------------------------""" printfmt = "%-11s %-7d %-11s %s" diff --git a/flytestdlib/Makefile b/flytestdlib/Makefile index 0b5dae5a709..f5883b6bca6 100644 --- a/flytestdlib/Makefile +++ b/flytestdlib/Makefile @@ -18,7 +18,7 @@ generate: @echo "************************ go generate **********************************" go generate ./... -# This is the only target that should be overriden by the project. Get your binary into ${GOREPO}/bin +# This is the only target that should be overridden by the project. Get your binary into ${GOREPO}/bin .PHONY: compile compile: mkdir -p ./bin diff --git a/flytestdlib/boilerplate/flyte/golang_test_targets/go-gen.sh b/flytestdlib/boilerplate/flyte/golang_test_targets/go-gen.sh index 54bd6af61b1..5ac17fa40ae 100755 --- a/flytestdlib/boilerplate/flyte/golang_test_targets/go-gen.sh +++ b/flytestdlib/boilerplate/flyte/golang_test_targets/go-gen.sh @@ -9,7 +9,7 @@ go generate ./... if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: Go code updated without commiting generated code." + echo "FAILED: Go code updated without committing generated code." echo "Ensure make generate has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/flytestdlib/cli/pflags/readme.rst b/flytestdlib/cli/pflags/readme.rst index 8a47d921f83..84d6d105d8a 100644 --- a/flytestdlib/cli/pflags/readme.rst +++ b/flytestdlib/cli/pflags/readme.rst @@ -2,7 +2,7 @@ Pflags Generator ================ -This tool enables you to generate code to add pflags for all fields in a struct (recursively). In conjunction with the config package, this can be useful to generate cli flags that overrides configs while maintaing type safety and not having to deal with string typos. +This tool enables you to generate code to add pflags for all fields in a struct (recursively). In conjunction with the config package, this can be useful to generate cli flags that overrides configs while maintaining type safety and not having to deal with string typos. Getting Started ^^^^^^^^^^^^^^^ diff --git a/flytestdlib/config/config_cmd.go b/flytestdlib/config/config_cmd.go index 97db03f9588..8e16faf3f34 100644 --- a/flytestdlib/config/config_cmd.go +++ b/flytestdlib/config/config_cmd.go @@ -57,7 +57,7 @@ func NewConfigCommand(accessorProvider AccessorProvider) *cobra.Command { docsCmd := &cobra.Command{ Use: "docs", - Short: "Generate configuration documetation in rst format", + Short: "Generate configuration documentation in rst format", RunE: func(cmd *cobra.Command, args []string) error { sections := GetRootSection().GetSections() orderedSectionKeys := sets.NewString() diff --git a/flytestdlib/contextutils/context.go b/flytestdlib/contextutils/context.go index 797d9a8089d..080b0a098c9 100644 --- a/flytestdlib/contextutils/context.go +++ b/flytestdlib/contextutils/context.go @@ -166,7 +166,7 @@ func addStringFieldWithDefaults(ctx context.Context, m map[string]string, fieldK m[fieldKey.String()] = val.(string) } -// GetLogFields gets a map of all known logKeys set on the context. logKeys are special and should be used incase, +// GetLogFields gets a map of all known logKeys set on the context. logKeys are special and should be used in case, // context fields are to be added to the log lines. func GetLogFields(ctx context.Context) map[string]interface{} { res := map[string]interface{}{} diff --git a/rfc/README.md b/rfc/README.md index 7a9d8f95299..0490072aa03 100644 --- a/rfc/README.md +++ b/rfc/README.md @@ -35,7 +35,7 @@ Some good practices when starting an RFC include: - If there's no objection (silence is approval) then create an Issue from the Discussion ([see how](https://docs.github.com/en/issues/tracking-your-work-with-issues/creating-an-issue#creating-an-issue-from-discussion)) - Proceed to [open a PR](#opening-a-pull-request) - Discussing the topic on the [#contribute](https://flyte-org.slack.com/archives/C04NJPLRWUX) Slack channel -- Adding the topic to the Contributor's [meeting agenda](https://hackmd.io/@davidmirror/rkqCpbK1n) to make sure it alligns with roadmap +- Adding the topic to the Contributor's [meeting agenda](https://hackmd.io/@davidmirror/rkqCpbK1n) to make sure it aligns with roadmap - Taking the time to produce a well-written, well-thought-of document by using the template located [here](https://github.com/flyteorg/flyte/blob/RFC-Process/rfc/RFC-0000-Template.md). ## Where to store RFCs @@ -44,7 +44,7 @@ The Flyte repo on GitHub has an RFC folder with 3 directories: - **Core language:** proposals to `FlyteIdl` that change the wire-format in any way are considered significant changes that require revision and approval. Reviewers: At least one of [Flyte maintainers](https://github.com/flyteorg/community/blob/main/MAINTAINERS.md) and one of [Technical Steering Committee](https://github.com/flyteorg/community/blob/main/MAINTAINERS.md). -- **General System:** Changes to other repos that introduce signficant change of behavior or user-impacting features. +- **General System:** Changes to other repos that introduce significant change of behavior or user-impacting features. Reviewers: At least one of [Flyte maintainers](https://github.com/flyteorg/community/blob/main/MAINTAINERS.md) and one of [Technical Steering Committee](https://github.com/flyteorg/community/blob/main/MAINTAINERS.md). - **CI-CD:** Significant changes to CI-CD System that have impact across different repositories. @@ -63,7 +63,7 @@ The Flyte repo on GitHub has an RFC folder with 3 directories: Once a pull request is opened, the RFC is now in development and the following will happen: -* It will be introduced in a future Contributor's meetup, happenning every other week, except othwerwise informed. +* It will be introduced in a future Contributor's meetup, happening every other week, except otherwise informed. * The proposal will be discussed as much as possible in the RFC pull request directly. Any outside discussion will be summarized in the comment thread. * When deemed "ready", a maintainer or TSC member will propose a "motion for Final Comment Period (FCP)" along with a disposition of the outcome (merge, close, or postpone). This step is taken when enough discussions of the tradeoffs have taken place and the community is in a position to make a decision. * The proposal enters FCP unless there's any objection (lazy consensus). @@ -75,7 +75,7 @@ Once a pull request is opened, the RFC is now in development and the following w A proposal is considered Accepted when it has: * Completed the FCP with no significant objections -* Received an approval vote from a supermajority (2/3) of the [Technical Steering Comittee](https://github.com/flyteorg/community/blob/main/MAINTAINERS.md)'s members +* Received an approval vote from a supermajority (2/3) of the [Technical Steering Committee](https://github.com/flyteorg/community/blob/main/MAINTAINERS.md)'s members ## Implementing RFCs diff --git a/rfc/core language/1461-cache-serialize-api.md b/rfc/core language/1461-cache-serialize-api.md index f245fd177fc..6788d994967 100644 --- a/rfc/core language/1461-cache-serialize-api.md +++ b/rfc/core language/1461-cache-serialize-api.md @@ -6,18 +6,18 @@ ## 1 Executive Summary -The cache serialize API provides a lease-based reservation system for cachable tasks in Flyte. The purpose is to mitigate simultaneous evaluations of cachable tasks over identical inputs, resulting in duplication of work and therefore inefficient resource utilization. The proposed approach will more effectively process workflows with potentially significant improvements in end-to-end workflow evaluation times for instances with long running cachable tasks. +The cache serialize API provides a lease-based reservation system for cacheable tasks in Flyte. The purpose is to mitigate simultaneous evaluations of cacheable tasks over identical inputs, resulting in duplication of work and therefore inefficient resource utilization. The proposed approach will more effectively process workflows with potentially significant improvements in end-to-end workflow evaluation times for instances with long running cacheable tasks. ## 2 Motivation -Currently, Flyte initializes cachable tasks with a lookup the the datacatalog cache. If a previous instance of the task (ie. identical version and inputs) has completed the cached values are used, otherwise the task is executed. +Currently, Flyte initializes cacheable tasks with a lookup the the datacatalog cache. If a previous instance of the task (ie. identical version and inputs) has completed the cached values are used, otherwise the task is executed. -The issue is that disparate workflows, or unique executions of the same workflow, may execute an identical cachable task before a previous has completed. This results in multiple instances of the same task execution being performed simultaneously. For example, two workflows, namely A and B, contain the same long running (ex. 2 time units) identical cachable task. Workflow A executes the task beginning at t0 (finishing at t2) and workflow B executes the task at t1 (finishing at t3). The inefficiencies are twofold: +The issue is that disparate workflows, or unique executions of the same workflow, may execute an identical cacheable task before a previous has completed. This results in multiple instances of the same task execution being performed simultaneously. For example, two workflows, namely A and B, contain the same long running (ex. 2 time units) identical cacheable task. Workflow A executes the task beginning at t0 (finishing at t2) and workflow B executes the task at t1 (finishing at t3). The inefficiencies are twofold: 1. From t1 (workflow B task execution) to t2 (workflow A task completion) there are two instances of the same task performing identical work (albeit at different stages). 2. The execution from workflow B will not complete until t3, whereas it could use the cached results from workflow A at t2 to complete faster. -The proposed solution will mitigate unnecessary resource utilization by disallowing duplicate task executions and provide more efficient workflow processing by using all available cachable task results. +The proposed solution will mitigate unnecessary resource utilization by disallowing duplicate task executions and provide more efficient workflow processing by using all available cacheable task results. ## Proposed Implementation @@ -46,12 +46,12 @@ We propose introducing additional gRPC calls into `flyteidl` to support acquisit - GetOrExtendReservation: Attempt to get a reservation for the corresponding artifact. If one already exists and you are the owner, instead extend it. If one already exists and you are not the owner, return that reservation (containing ownership, expiration, etc) to be displayed user-facing. - ReleaseReservation: Release an active artifact reservation. -Reservation requests will include a requested heartbeat-interval-seconds configuration option. This is typically set to FlytePropeller workflow re-evaluation duration (as explored below). This value is used to determine reservation expiration dates, where a clock-skew algorithm is used to minimize the chance that an active reservation expires before being exended. It should be noted that the datacatalog configuration may define a maximum value to curb unnecessarily large heartbeat interval requests. +Reservation requests will include a requested heartbeat-interval-seconds configuration option. This is typically set to FlytePropeller workflow re-evaluation duration (as explored below). This value is used to determine reservation expiration dates, where a clock-skew algorithm is used to minimize the chance that an active reservation expires before being extended. It should be noted that the datacatalog configuration may define a maximum value to curb unnecessarily large heartbeat interval requests. ### Datacatalog Managed Reservations The `datacatalog` service will be responsible for managing cache reservations. This will entail the addition of a new ReservationManager and ReservationRepo (with gorm implementation) per the project standards. Additionally it requires a new table in the db where reservations are uniquely defined based on DatasetID and an artifact tag. -All database operations are performed with write consistency, where records are only inserted or updated on restrictive conditions. This eliminates the possibility for race conditions. Where two executions attempt to aquire a cache reservation simultaneously, only one can succeeed. +All database operations are performed with write consistency, where records are only inserted or updated on restrictive conditions. This eliminates the possibility for race conditions. Where two executions attempt to acquire a cache reservation simultaneously, only one can succeeed. Additionally, the `datacatalog` configuration file defines max-heartbeat-interval-seconds and heartbeat-grace-period-multiplier to define the maximum heartbeat interval of reservation extensions and set the reservation expiration (computed as heartbeat-interval-seconds * heartbeat-grace-period-multiplier). @@ -90,7 +90,7 @@ We introduce a new task phase, namely WaitingForCache, which denotes tasks that ## 5 Drawbacks -The advantages / disadvantages may not be clear to users. Intuitively, this feature may be viewed as a replacement of the existing cache mechanism, where all tasks may benefit. It needs to be explicitely stated this is not the case. It is intended as a extension which improves performance for long-running cachable tasks. +The advantages / disadvantages may not be clear to users. Intuitively, this feature may be viewed as a replacement of the existing cache mechanism, where all tasks may benefit. It needs to be explicitly stated this is not the case. It is intended as a extension which improves performance for long-running cacheable tasks. ## 6 Alternatives @@ -98,7 +98,7 @@ A reservation management system is the only obvious solution to enable different ## 7 Potential Impact and Dependencies -This solution introduces a significant increase in datacatalog gRPC communication. Tasks that are in the WaitingForCache phase use separate calls to check the cache status reservation status during each re-evaluation loop. If this loop is frequent (ex. 3 seconds) and the number of waiting tasks is large then datacatalog traffic may negatively effect available bandwith. To combat this we ensure the size of cache and reservation check request / response combinations is relatively small. However, we may need to introduce additional logic where these checks are only performed every N workflow evaluations rather then every one. +This solution introduces a significant increase in datacatalog gRPC communication. Tasks that are in the WaitingForCache phase use separate calls to check the cache status reservation status during each re-evaluation loop. If this loop is frequent (ex. 3 seconds) and the number of waiting tasks is large then datacatalog traffic may negatively effect available bandwidth. To combat this we ensure the size of cache and reservation check request / response combinations is relatively small. However, we may need to introduce additional logic where these checks are only performed every N workflow evaluations rather then every one. ## 8 Unresolved questions @@ -106,4 +106,4 @@ This solution introduces a significant increase in datacatalog gRPC communicatio ## 9 Conclusion -This solution for evaluating cache operations in serial by reserving artifacts will mitigate unecessary resource utilization for cachable, long running tasks. It is designed for scale to cope with large deployments and effectively manages reservation management including reservation expirations and race conditions during acquisition. It has the potential for significant performance improvements in disparate workflows, or sequential executions of the same workflow, where an expensive, cachable task is continuously executed. +This solution for evaluating cache operations in serial by reserving artifacts will mitigate unnecessary resource utilization for cacheable, long running tasks. It is designed for scale to cope with large deployments and effectively manages reservation management including reservation expirations and race conditions during acquisition. It has the potential for significant performance improvements in disparate workflows, or sequential executions of the same workflow, where an expensive, cacheable task is continuously executed. diff --git a/rfc/core language/sum-types-2.md b/rfc/core language/sum-types-2.md index f9e40cab52f..e85eed9106b 100644 --- a/rfc/core language/sum-types-2.md +++ b/rfc/core language/sum-types-2.md @@ -4,7 +4,7 @@ # Executive Summary -Some questions on the previously proposed implementations made it clear that a deeper investigation into possible alternatives was required. I consider 3 programming languages with fundamentally different union type implementations in the core language or the standard library. I develop a new version of the sum type IDL representation that accomodates all three languages. +Some questions on the previously proposed implementations made it clear that a deeper investigation into possible alternatives was required. I consider 3 programming languages with fundamentally different union type implementations in the core language or the standard library. I develop a new version of the sum type IDL representation that accommodates all three languages. # Examples in Programming Languages diff --git a/rfc/core language/sum-types.md b/rfc/core language/sum-types.md index e56f2d908e6..2c8f1f72954 100644 --- a/rfc/core language/sum-types.md +++ b/rfc/core language/sum-types.md @@ -114,7 +114,7 @@ Currently any type can take none values ([see this comment in Propeller's source // ... } ``` - - TODO: It might be necessary to accumulate the errors for each of the summands' failed binding validations to ease debugging. If that is the case, it would be preferrable to ignore errors by default and re-run the verification if no candidate was found to avoid slowing down the non-exceptional case + - TODO: It might be necessary to accumulate the errors for each of the summands' failed binding validations to ease debugging. If that is the case, it would be preferable to ignore errors by default and re-run the verification if no candidate was found to avoid slowing down the non-exceptional case - The verbosity of the resulting messages would make it very hard to read so only a broad error is collected right now. It is unclear whether the extra complexity in the code and in the output is justified - Implement a `typing.Union` type transformer in Python FlyteKit: - `get_literal_type`: diff --git a/rfc/system/0000-runtime-workflow-control-using-signalling.md b/rfc/system/0000-runtime-workflow-control-using-signalling.md index c5fa30ca2ef..163bfdbac41 100644 --- a/rfc/system/0000-runtime-workflow-control-using-signalling.md +++ b/rfc/system/0000-runtime-workflow-control-using-signalling.md @@ -143,4 +143,4 @@ Without a notification system excessive signalling requirements may result in fr ## 9 Conclusion -In this docuemnt we have proposed a solution to provide dynamic, runtime control of workflows using signals and gate nodes. The variety of use-cases extend the application of Flyte to better satisfy the needs of our community. The front-end approaches outline potential APIs to clearly promote this functionality and the backend architecture can scale to cope with integration into large deployments. +In this document we have proposed a solution to provide dynamic, runtime control of workflows using signals and gate nodes. The variety of use-cases extend the application of Flyte to better satisfy the needs of our community. The front-end approaches outline potential APIs to clearly promote this functionality and the backend architecture can scale to cope with integration into large deployments. diff --git a/rfc/system/0007-community-groups.md b/rfc/system/0007-community-groups.md index b4bf3f27f98..2a41a1185b3 100644 --- a/rfc/system/0007-community-groups.md +++ b/rfc/system/0007-community-groups.md @@ -100,7 +100,7 @@ Working Groups will be disbanded if either of the following is true: ## 4. Drawbacks -At the beginning, it could cause some confusion especially around SIG/WG ownership on repositories, but once those commmunity groups are formed, eventually most (ideally all) subcomponents of the project will be owned by a group. +At the beginning, it could cause some confusion especially around SIG/WG ownership on repositories, but once those community groups are formed, eventually most (ideally all) subcomponents of the project will be owned by a group. ## 5. Alternatives diff --git a/rfc/system/1476-task-resources.md b/rfc/system/1476-task-resources.md index d4918b20be8..8889b53752f 100644 --- a/rfc/system/1476-task-resources.md +++ b/rfc/system/1476-task-resources.md @@ -10,7 +10,7 @@ Task resource allocation in Flyte includes the process of setting *CPU, memory, ## 2 Motivation -As the system control plane, flyteadmin is the authoritative store for defaults and per-project (and other) overrides. However, the process of converting a user-defined task to a Kubernetes object is handled by appropriate plugins. Therefore, it makes sense to have task resource resolution occur at execution time while leveraging admin to store and propage task default values. +As the system control plane, flyteadmin is the authoritative store for defaults and per-project (and other) overrides. However, the process of converting a user-defined task to a Kubernetes object is handled by appropriate plugins. Therefore, it makes sense to have task resource resolution occur at execution time while leveraging admin to store and propagate task default values. ## 3 Proposed Implementation @@ -18,7 +18,7 @@ Background ---------- Kubernetes allows users to specify both [requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/). **Requests** are used to schedule pods onto nodes. **Limits** are hard stops that running containers are not permitted to exceed. -In the context of what a Flyte user can specify, flytekit [task decorators](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html#flytekit-task) permit setting both requests and limits. Furthermore, in their workflow definitions, users can specify node-level overrides which supercede static task definition resource values. +In the context of what a Flyte user can specify, flytekit [task decorators](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html#flytekit-task) permit setting both requests and limits. Furthermore, in their workflow definitions, users can specify node-level overrides which supersede static task definition resource values. In the Flyte back-end, **default** values can be applied as requests and limits when a user omits them from a task specification. Furthermore, **max** values are used to enforce that either user-specified resource requests or limits do not exceed a configured threshold. diff --git a/rfc/system/1483-flytepropeller-horizontal-scaling.md b/rfc/system/1483-flytepropeller-horizontal-scaling.md index 70cd8291b87..70e9f3c0a1d 100644 --- a/rfc/system/1483-flytepropeller-horizontal-scaling.md +++ b/rfc/system/1483-flytepropeller-horizontal-scaling.md @@ -9,11 +9,11 @@ This Document - https://hackmd.io/uVT5Q9zNS8SNMkEhs5pfqQ ## 1 Executive Summary -We propose an approach to facilitate automatic, horizontal scaling of FlytePropeller. This is important to combat increases in indivdual workflow execution latency as the number of concurrent workflow executions increases. The solution includes a new FlytePropeller Manager component which handles automatic scaling of additional FlytePropeller instances. Additionally, we rely on a configurable sharding mechanism to ensure deterministic, decentralized coordination of Flyte workflows. +We propose an approach to facilitate automatic, horizontal scaling of FlytePropeller. This is important to combat increases in individual workflow execution latency as the number of concurrent workflow executions increases. The solution includes a new FlytePropeller Manager component which handles automatic scaling of additional FlytePropeller instances. Additionally, we rely on a configurable sharding mechanism to ensure deterministic, decentralized coordination of Flyte workflows. ## 2 Motivation -FlytePropeller is the engine that drives Flyte workflows. It is highly optimized and a single instance can run thousands of concurrent workflows. Internally, workflows are processed in a control loop which operates over a work queue. Consequently, as the number of concurrent workflows increases beyond a resonable threshold, the latency of workflow completion will experience marginal increases as well. +FlytePropeller is the engine that drives Flyte workflows. It is highly optimized and a single instance can run thousands of concurrent workflows. Internally, workflows are processed in a control loop which operates over a work queue. Consequently, as the number of concurrent workflows increases beyond a reasonable threshold, the latency of workflow completion will experience marginal increases as well. The default FlytePropeller deployment strategy is a single instance and while it is possible to launch multiple FlytePropeller instances (ie. one per namespace) this has to be done manually. This approach has obvious limits to scalability. Therefore, to reduce workflow processing latency as the number of workflows increase, we need a new solution to increase the refresh rate per workflow. @@ -41,7 +41,7 @@ ShardingStrategy: Strategy: ``` -Depending on the Sharding strategy the propeller-manager, launches one or more FlytePropeller instances. Ideally the FlytePropeller instances are identical to the propeller-manager (ie. k8s pod configuration) with minor changes that affect how the sharding works. The configuration will inlcude FlytePropeller image definition, serviceAccounts, etc. +Depending on the Sharding strategy the propeller-manager, launches one or more FlytePropeller instances. Ideally the FlytePropeller instances are identical to the propeller-manager (ie. k8s pod configuration) with minor changes that affect how the sharding works. The configuration will include FlytePropeller image definition, serviceAccounts, etc. ```yaml= ConstantShardingStrategy: diff --git a/rfc/system/2633-eviction-of-cached-task-outputs.md b/rfc/system/2633-eviction-of-cached-task-outputs.md index 1bca582755f..b60fd56eff0 100644 --- a/rfc/system/2633-eviction-of-cached-task-outputs.md +++ b/rfc/system/2633-eviction-of-cached-task-outputs.md @@ -62,7 +62,7 @@ The protobuf definitions in `flyteidl` need to include a new (optional) paramete #### `flyteadmin` -`flyteadmin` needs to accept an updated config for execution requests to support the eviction override as well as pass it along to `flytepropeller`. The new flag should be available via API to either be invoked programatically (e.g. via a user script) or via `flyteconsole`. +`flyteadmin` needs to accept an updated config for execution requests to support the eviction override as well as pass it along to `flytepropeller`. The new flag should be available via API to either be invoked programmatically (e.g. via a user script) or via `flyteconsole`. #### `flyteconsole` @@ -136,7 +136,7 @@ The proposed cache eviction changes introduces a slight overhead in execution pr Depending on the size of the cached outputs and the blob storage speed, this might induce increased scheduling times for tasks, although no performance impact should occur if no cache eviction is performed. In order to minimize the execution startup delay, cache eviction could be postponed until the task executed successfully, only requiring a quick check beforehand and ensuring the actual computation can start as soon as possible. -We do not anticipate any noticable impact by the API extension for `flyteadmin` during regular executions, however evicting all cached outputs of a large workflow could introduce some strain on `flyteadmin`/`datacatalog` during processing. +We do not anticipate any noticeable impact by the API extension for `flyteadmin` during regular executions, however evicting all cached outputs of a large workflow could introduce some strain on `flyteadmin`/`datacatalog` during processing. ## 6 Alternatives @@ -146,13 +146,13 @@ At the moment, no direct alternative exists to allow for partial re-processing o As this change spans across most of Flyte's components and repositories, a coordinated effort to introduce and test this proposal will be required. Since we rely on an API change, a `flyteidl` release will be required before additional components can be adapted. -The potential for malicious exploitation is deemed non-existant as no access to any data is provided and most additional communication will be performed intra-cluster. An attacker could potentially cause additional computational costs by removing cached outputs. +The potential for malicious exploitation is deemed non-existent as no access to any data is provided and most additional communication will be performed intra-cluster. An attacker could potentially cause additional computational costs by removing cached outputs. ## 8 Unresolved questions 1. When should cache eviction happen during a repeated execution? Should we evict the cache entry immediately before starting task execution, potentially leaving no cached output available if the execution fails, or postpone the removal until the task has finished (successfully) and new data can be stored right away? - - **RESOLVED**: eviction will happen after the successfull execution of a task, overwriting the existing data. + - **RESOLVED**: eviction will happen after the successful execution of a task, overwriting the existing data. 2. Should the `cache_override` flag also be added to `flytekit`'s task decorator? This would allow users to define tasks/workflows which will automatically evict their cached results, however does not strictly fit with the actual task "definition". 3. Which Flyte tools (`flyteconsole`/`flytectl`) should support the proposed `AdminService` API extension for `flyteadmin`, if any? diff --git a/rfc/system/2995-performance-benchmarking.md b/rfc/system/2995-performance-benchmarking.md index daa78e9d72c..8f40dfbc4da 100644 --- a/rfc/system/2995-performance-benchmarking.md +++ b/rfc/system/2995-performance-benchmarking.md @@ -91,7 +91,7 @@ Fortunately the base [Flyte repository](https://github.com/flyteorg/flyte) conta - Short Running Tasks: Flyte excels at abstracting the costs associated with cloud execution and amortizing them over parallel execution of complex tasks. In scenarios where tasks are short-lived, Flyte overhead will be relatively large and can be responsible for large amount of workflow runtime. It is important to better understand these limitations to help Flyte adapt and support performant short running operations. - Chained Cache Hits: Cache hits should be zero-cost but operationally require network I/O, database lookups, and blobstore read / writes. These contribute to unintuitive performance and should be minimized. - Large Workflows: FlytePropeller is designed as a k8s operator and consequently uses a k8s CRD to track workflow status. Within k8s, CRDs are stored in etcd, a KV store in which performance degradations are well documented as the value sizes increase. Understanding the implications of CRD size compared to workflow runtimes can help inform logical workflow structure. -- Large Fan-Out Map Tasks: Map tasks are designed to perform a single operation on a large collection of homogeneous data. Their implementation within Flyte goes further than syntatic sugar; rather, they reduce metadata maintenance to support larger scale than dynamic tasks, etc. To support increasing scales it is important to mitigate issues in executing over large fan outs. +- Large Fan-Out Map Tasks: Map tasks are designed to perform a single operation on a large collection of homogeneous data. Their implementation within Flyte goes further than syntactic sugar; rather, they reduce metadata maintenance to support larger scale than dynamic tasks, etc. To support increasing scales it is important to mitigate issues in executing over large fan outs. - Nested Dynamic Workflows and Launchplans: Dynamics require k8s to execute a Pod which compiles a Flyte DAG that is subsequently execution. Similarly, launchplans spawn a separate CRD (and FlyteWorkflow) which is then tracked between workflows. Both of these features are very powerful in supporting developmental use-cases - however this does come at a cost. - Parallelized Subworkflows: Many users logically partition code-bases into workflows which are then used as subworkflows. Within each of these calls, Flyte internally appends a "start" and "end" node to anchor these in the parent workflow which incurs additional costs in data movements, etc. @@ -173,12 +173,12 @@ The considerations for this proposal are rooted in esoteric knowledge of the Fly ## 6 Alternatives -Currently, Flyte emits a collection of metrics through prometheus. This is very powerful in defining SLOs and macro tracking a Flyte deployment, but it is very cumbersome to analyze performance of an individual workflow. This is because prometheus metrics (1) are meant to have bounded label values, so using workflow id results in massive memory utilization and (2) values are reported in quantiles, so individual values are lost and a time-series based analysis is impossible. For these reasons, using existing prometheus metrics to inform performance benchmarking will result in inaccurate and unprecise results. +Currently, Flyte emits a collection of metrics through prometheus. This is very powerful in defining SLOs and macro tracking a Flyte deployment, but it is very cumbersome to analyze performance of an individual workflow. This is because prometheus metrics (1) are meant to have bounded label values, so using workflow id results in massive memory utilization and (2) values are reported in quantiles, so individual values are lost and a time-series based analysis is impossible. For these reasons, using existing prometheus metrics to inform performance benchmarking will result in inaccurate and imprecise results. The [FlytePropeller repository](https://github.com/flyteorg/flytepropeller) contains a script called [fold_logs.py](https://github.com/flyteorg/flytepropeller/blob/master/script/fold-logs.py). This script parses FlytePropeller logs and outputs a hierarchical time-series breakdown of Flytes management of an individual workflow. This output is probably very close to the telemetry data we expect to produce. However, this is based on parsing log messages which ensures that the results will be inaccurate and it is difficult to quantify operations unless specific "start X" and "stop X" logs are recorded. An example output of this script is provided below: hamersaw@ragnarok:~/development/flytepropeller$ ./script/fold-logs.py ~/flyte.log fd5d4ee88f9dc4436a76 - Timestamp Line Duration Heirarchical Log Layout + Timestamp Line Duration Hierarchical Log Layout ---------------------------------------------------------------------------------------------------- 18:08:38 282 14.0s 1 Workflow 18:08:38 283 0.0s 1.1 Processing diff --git a/rfc/system/3346-array-node.md b/rfc/system/3346-array-node.md index 3f3d4a39c68..754c222ed98 100644 --- a/rfc/system/3346-array-node.md +++ b/rfc/system/3346-array-node.md @@ -82,7 +82,7 @@ If this is implemented correctly, the performance should be very similar to exis ## 5 Drawbacks -The only currently known drawback of this proposal is adding cache lookups for subtasks as a synchronous call. The existing implementation, implementing as a backend plugin, uses a separate service to batch cache lookups that are performed asynchronously. This is marginally more efficient but does introduce it's own problems in complexity. Additionally, while the performance of cache lookups may be slightly degraded, the introduction of ArrayNodes means that cacheing will be functionality complete, specifically cache serialization and cache overwriting will be supported over `map_task` subtasks. +The only currently known drawback of this proposal is adding cache lookups for subtasks as a synchronous call. The existing implementation, implementing as a backend plugin, uses a separate service to batch cache lookups that are performed asynchronously. This is marginally more efficient but does introduce it's own problems in complexity. Additionally, while the performance of cache lookups may be slightly degraded, the introduction of ArrayNodes means that caching will be functionality complete, specifically cache serialization and cache overwriting will be supported over `map_task` subtasks. ## 6 Alternatives diff --git a/rfc/system/3553-config-override.md b/rfc/system/3553-config-override.md index 044fd468ca5..81c275ae73f 100644 --- a/rfc/system/3553-config-override.md +++ b/rfc/system/3553-config-override.md @@ -10,7 +10,7 @@ Using `with_overrides` can provide node-level override for workflow, but this is ## Design -By default, all task nodes are not overridable at runtime. However, users can call `.with_runtime_override(name: str)` on task nodes to register a "hook" on tasks, which allows the task to be indentified by `name` and overridden with new config at runtime. +By default, all task nodes are not overridable at runtime. However, users can call `.with_runtime_override(name: str)` on task nodes to register a "hook" on tasks, which allows the task to be identified by `name` and overridden with new config at runtime. One main motivation for introducing such a "hook mechanism" based on identifiers/names is that we couldn't come up with a good answer how a good UX can be achieved when having to specify overrides in a nested workflow graph in the UI, in code, or via the CLI since all approaches would require replicating the workflow graph structure in the overrides config. This way, the overrides can be specified in a simple map and the structure of the workflow graph does not matter/does not have to be shown. @@ -26,8 +26,8 @@ def t2(): @workflow def wf(): t1() # this task node cannot be overridden - t1().with_runtime_override("task-yee") # can be overriden under the name "task-yee" - t2().with_runtime_override("task-ketan") # can be overriden under the name "task-ketan" + t1().with_runtime_override("task-yee") # can be overridden under the name "task-yee" + t2().with_runtime_override("task-ketan") # can be overridden under the name "task-ketan" t3() # this task node cannot be overridden ``` diff --git a/rfc/system/3749-sane-overridable-defaults.md b/rfc/system/3749-sane-overridable-defaults.md index 0c4fdedb0f9..eff22063242 100644 --- a/rfc/system/3749-sane-overridable-defaults.md +++ b/rfc/system/3749-sane-overridable-defaults.md @@ -4,7 +4,7 @@ Flyte comes with a rich set of overridable defaults ([matchable resources](https These can be configured to have default levels across a Flyte installation, on a per-domain basis, on a per-project basis and even at a per-workflow basis. -Managing these requires setting the defaults at a level of specifity using a variety of [message types](https://github.com/flyteorg/flyteidl/blob/master/protos/flyteidl/admin/matchable_resource.proto#L14,L38). Furthermore it's difficult to holistically reason about what overrides exist at say, the project level. Updating these overrides is error-prone because there is no existing merge logic and requires understanding the data model to even know what specific matchable resource to specifically update. +Managing these requires setting the defaults at a level of specificity using a variety of [message types](https://github.com/flyteorg/flyteidl/blob/master/protos/flyteidl/admin/matchable_resource.proto#L14,L38). Furthermore it's difficult to holistically reason about what overrides exist at say, the project level. Updating these overrides is error-prone because there is no existing merge logic and requires understanding the data model to even know what specific matchable resource to specifically update. Importantly, the current mechanism for applying application overrides is driven imperatively using flytectl and hard to reason about across revisions or manage using [Infrastructure as Code](https://learn.microsoft.com/en-us/devops/deliver/what-is-infrastructure-as-code). @@ -130,7 +130,7 @@ In FlyteAdmin config ``` flyteadmin: - overrrides: + overrides: sourceType: configmap sourcePath: /etc/flyte/overrides/ @@ -174,7 +174,7 @@ metadata: ``` This can work the same way cluster resource templates are mounted as ConfigMaps in the FlyteAdmin deployment but are [fully customizable](https://github.com/flyteorg/flyte/blob/5ab8cb887a5a7051070cb93fca603ed1f22f2f74/charts/flyte-core/values.yaml#L752-L810). -This does require restarting the FlyteAdmin deployment for invididual overrides changes but allows for declaratively defining the complete set of overrides in a reproducible manner. See more discusison on the trade-offs [here](https://github.com/flyteorg/flyte/discussions/3731#discussioncomment-6053743). +This does require restarting the FlyteAdmin deployment for individual overrides changes but allows for declaratively defining the complete set of overrides in a reproducible manner. See more discusison on the trade-offs [here](https://github.com/flyteorg/flyte/discussions/3731#discussioncomment-6053743). #### FlyteAdmin changes diff --git a/rfc/system/RFC-0000-console-ui-upgrade.md b/rfc/system/RFC-0000-console-ui-upgrade.md index 4294bd5bd59..e97b5b91b4f 100644 --- a/rfc/system/RFC-0000-console-ui-upgrade.md +++ b/rfc/system/RFC-0000-console-ui-upgrade.md @@ -25,7 +25,7 @@ This proposal includes 2 distinct and independent sets of changes. They are boun The first change is to move the application header that contains the branding and user identity elements from the top of the application window to the left rail. Doing this brings the appearance of the Flyte console more inline with the _de facto_ standard for developer console applications and also frees up ~75px of vertical real estate. The following illustrates this change. -![comparison of new proposed changes with exisiting](../img/ui-upgrade-verticalRealEstate.png) +![comparison of new proposed changes with existing](../img/ui-upgrade-verticalRealEstate.png) The second change is to introduce breadcrumb and page title elements consistently on all pages. This change is expected to have a positive usability and a11y impact. diff --git a/rsts/community/index.rst b/rsts/community/index.rst index c3d5850f802..5a9d9b620d1 100644 --- a/rsts/community/index.rst +++ b/rsts/community/index.rst @@ -39,7 +39,7 @@ Open Source Community Meeting ----------------------------- When: every other Tuesday, 9:00 AM Pacific Time. -You're welcome to join and learn from other commmunity members sharing their experiences with Flyte or any other technology from the AI ecosystem. +You're welcome to join and learn from other community members sharing their experiences with Flyte or any other technology from the AI ecosystem. Check out the event details and add it to your `calendar `_, or just pop in! .. image:: https://img.shields.io/badge/Join-Zoom-blue?style=for-the-badge diff --git a/rsts/community/troubleshoot.rst b/rsts/community/troubleshoot.rst index 9821d410176..b4f6c271d4c 100644 --- a/rsts/community/troubleshoot.rst +++ b/rsts/community/troubleshoot.rst @@ -55,7 +55,7 @@ This issue is more common on MacOS devices. Make sure that your Docker daemon ha limits: memory: 1Gi -- Also, the default container resource limits are can be overriden from the task itself: +- Also, the default container resource limits are can be overridden from the task itself: .. code-block:: python diff --git a/rsts/concepts/console.rst b/rsts/concepts/console.rst index 4cdf9614b17..d872f8990cb 100644 --- a/rsts/concepts/console.rst +++ b/rsts/concepts/console.rst @@ -98,7 +98,7 @@ Debug Output ============ This application makes use of the `debug `_ -libary to provide namespaced debug output in the browser console. In +library to provide namespaced debug output in the browser console. In development, all debug output is enabled. For other environments, the debug output must be enabled manually. You can do this by setting a flag in localStorage using the console: ``localStorage.debug = 'flyte:*'``. Each module in diff --git a/rsts/concepts/workflow_lifecycle.rst b/rsts/concepts/workflow_lifecycle.rst index 7c79d45887e..efb11e52d8c 100644 --- a/rsts/concepts/workflow_lifecycle.rst +++ b/rsts/concepts/workflow_lifecycle.rst @@ -51,7 +51,7 @@ In the background Flyte will provide all the needed infrastructure such that by As mentioned earlier some part of plugin logic lives on the SDK. In this case think of ``Spark`` data class here as a placeholder for all the Spark settings that we need our plugin to know. We need to pass this -data accross multiple places. This is the config that Flyte operator (Flytepropeller) +data across multiple places. This is the config that Flyte operator (Flytepropeller) will need in order to build the needed spark cluster. ``Spark`` class also tells Flytekit’s SDK that this task will run as a ``PysparkFunctionTask`` because ``task_config`` points to a ``Spark`` object instance, this is @@ -61,7 +61,7 @@ background `__ . Package step is carried out by the sdk tooling you are using. @@ -126,7 +126,7 @@ as defined in ``FlyteIDL``. "hello_spark" ] -This representation is generated within Flytekit. Esentially the SDK is +This representation is generated within Flytekit. Essentially the SDK is generating the instructions that Flyte’s kubernetes operator needs to know in order to run this task at a later stage. @@ -143,7 +143,7 @@ Spark plugin expects all its particular settings in this field i.e: Spark workers, driver memory etc. `Container `__ -is part of Flyte’s IDL primitives. Esentially any Flyte task is ran as +is part of Flyte’s IDL primitives. Essentially any Flyte task is ran as either three primitives a ``Container`` a ``K8sPod`` or ``Sql``. Every task contains a ``Target`` which has to be either of these. In this particular case, our Spark cluster is a ``Container`` target. A @@ -173,12 +173,12 @@ For more information on why this task contains these fields check ``TaskTemplate`` in `FlyteIDL repository `__. I strongly advice you to take a look at the data structures in this file -as they provide good insight in the interfaces used all accross Flyte’s +as they provide good insight in the interfaces used all across Flyte’s codebases. 3. Once user has packaged workflows and tasks then a registration step is needed. During registration Flyte adds these protocolbuffer files to its - database, esentially making these tasks and workflows runnable for + database, essentially making these tasks and workflows runnable for the user. Registration is done via `Flytectl ` __ 4. At somepoint a Flyte user will trigger a Workflow run. The workflow @@ -224,7 +224,7 @@ method is where magic happens. At task runtime: - Calls `dispatch_execute `__ . This trigger the execution of our spark task. - - `PysparkFunctionTask `__. defines what gets run just before the user's task code gets executed. It esentially creatse a spark session and then run the user function (The actual code we want to run!). + - `PysparkFunctionTask `__. defines what gets run just before the user's task code gets executed. It essentially creatse a spark session and then run the user function (The actual code we want to run!). ------------ diff --git a/rsts/deployment/configuration/auth_setup.rst b/rsts/deployment/configuration/auth_setup.rst index c481c7f2d24..61a6b4c0aef 100644 --- a/rsts/deployment/configuration/auth_setup.rst +++ b/rsts/deployment/configuration/auth_setup.rst @@ -199,7 +199,7 @@ Apply OIDC Configuration * ```` is the name of your Helm release, typically ``flyte-backend``. You can find it using ``helm ls -n `` - 6. Verify that your Flyte deployment now requires succesful login to your IdP to access the UI (``https:///console``) + 6. Verify that your Flyte deployment now requires successful login to your IdP to access the UI (``https:///console``) 7. For ``flytectl`` / ``pyflyte``, make sure that your local config file (``$HOME/.flyte/config.yaml``) includes the following option: @@ -658,7 +658,7 @@ Follow the steps in this section to configure `flyteadmin` to use an external au **Congratulations** - At this point, every interaction with Flyte components -be it in the UI or CLI- should require a succesful login to your IdP, where your security policies are maintained and enforced. + At this point, every interaction with Flyte components -be it in the UI or CLI- should require a successful login to your IdP, where your security policies are maintained and enforced. Disable Helm secret management diff --git a/rsts/deployment/configuration/general.rst b/rsts/deployment/configuration/general.rst index c168051afa8..32e162afe89 100644 --- a/rsts/deployment/configuration/general.rst +++ b/rsts/deployment/configuration/general.rst @@ -245,7 +245,7 @@ Hierarchy ********* Increasing specificity defines how matchable resource attributes get applied. -The available configurations, in order of decreasing specifity are: +The available configurations, in order of decreasing specificity are: #. Domain, Project, Workflow name, and LaunchPlan diff --git a/rsts/deployment/configuration/performance.rst b/rsts/deployment/configuration/performance.rst index c5e8de2cccb..9d6f71ea8b0 100644 --- a/rsts/deployment/configuration/performance.rst +++ b/rsts/deployment/configuration/performance.rst @@ -69,7 +69,7 @@ Let us first look at various config properties that can be set and would impact - Description * - ``workers`` - propeller - - Larger the number, implies more workflows can be evaluated in parallel. But it should depend on number of CPU cores assigned to FlytePropeller and evaluated against the cost of context swtiching. A number usually < 500 - 800 with 4-8 cpu cores works fine. + - Larger the number, implies more workflows can be evaluated in parallel. But it should depend on number of CPU cores assigned to FlytePropeller and evaluated against the cost of context switching. A number usually < 500 - 800 with 4-8 cpu cores works fine. - Number of `logical threads` workers, that can work concurrently. Also implies number of workflows that can be executed in parallel. Since FlytePropeller uses go-routines, it can run way more than number of physical cores. * - ``workflow-reeval-duration`` - propeller @@ -101,7 +101,7 @@ Let us first look at various config properties that can be set and would impact - It is essential to limit the number of writes from FlytePropeller to flyteadmin to prevent brown-outs or request throttling at the server. Also the cache reduces number of calls to the server. * - ``tasks.backoff.max-duration`` - propeller - - This config is used to configure the maximum back-off interval incase of resource-quota errors + - This config is used to configure the maximum back-off interval in case of resource-quota errors - FlytePropeller will automatically back-off when k8s or other services request it to slowdown or when desired quotas are met. * - ``max-parallelism`` - admin, per workflow, per execution @@ -147,9 +147,9 @@ The worst case for FlytePropeller is workflows that have an extremely large fan- A solution for this is to limit the maximum number of nodes that can be evaluated. This can be done by setting max-parallelism for an execution. This can done in multiple ways -#. Platform default: This allows to set platform-wide defaults for maximum concurrency within a Workflow execution. This can be overriden per Launch plan or per execution. +#. Platform default: This allows to set platform-wide defaults for maximum concurrency within a Workflow execution. This can be overridden per Launch plan or per execution. The default `maxParallelism is configured to be 25 `_. - It can be overriden with this config block in flyteadmin + It can be overridden with this config block in flyteadmin .. code-block:: yaml @@ -167,19 +167,19 @@ This can done in multiple ways max_parallelism=30, ) -#. Specify for an execution. For any specific execution the max-parallelism can be overriden. This can be done using flytectl (and soon flyteconsole). Refer to :std:ref:`flyteCtl docs ` +#. Specify for an execution. For any specific execution the max-parallelism can be overridden. This can be done using flytectl (and soon flyteconsole). Refer to :std:ref:`flyteCtl docs ` Scaling out FlyteAdmin ======================= -FlyteAdmin is a stateless service. Often time before needing to scale FlyteAdmin, you need to scale the backing database. Check out the FlyteAdmin Dashboard to see signs of latency degredation and increase the size of backing postgres instance. +FlyteAdmin is a stateless service. Often time before needing to scale FlyteAdmin, you need to scale the backing database. Check out the FlyteAdmin Dashboard to see signs of latency degradation and increase the size of backing postgres instance. FlyteAdmin is a stateless service and its replicas (in the kubernetes deployment) can be simply increased to allow higher throughput. Scaling out Datacatalog ======================== -Datacatalog is a stateless service. Often time before needing to scale Datacatalog, you need to scale the backing database. Check out the Datacatalog Dashboard to see signs of latency degredation and increase the size of backing postgres instance. +Datacatalog is a stateless service. Often time before needing to scale Datacatalog, you need to scale the backing database. Check out the Datacatalog Dashboard to see signs of latency degradation and increase the size of backing postgres instance. Datacatalog is a stateless service and its replicas (in the kubernetes deployment) can be simply increased to allow higher throughput. Scaling out FlytePropeller @@ -191,11 +191,11 @@ FlytePropeller can be run manually per namespace. This is not a recommended solu Automatic scale-out ------------------- -FlytePropeller Manager is a new component introduced as part of `this RFC `_ to facilitate horizontal scaling of FlytePropeller through sharding. Effectively, the Manager is responsible for maintaining liveness and proper configuration over a collection of FlytePropeller instances. This scheme uses k8s label selectors to deterministically assign FlyteWorkflow CRD responsibilites to FlytePropeller instances, effectively distributing processing load over the shards. +FlytePropeller Manager is a new component introduced as part of `this RFC `_ to facilitate horizontal scaling of FlytePropeller through sharding. Effectively, the Manager is responsible for maintaining liveness and proper configuration over a collection of FlytePropeller instances. This scheme uses k8s label selectors to deterministically assign FlyteWorkflow CRD responsibilities to FlytePropeller instances, effectively distributing processing load over the shards. Deployment of FlytePropeller Manager requires k8s configuration updates including a modified FlytePropeller Deployment and a new PodTemplate defining managed FlytePropeller instances. The easiest way to apply these updates is by setting the "flytepropeller.manager" value to "true" in the `helm deployment `_ and setting the manager config at "configmap.core.manager". -Flyte provides a variety of Shard Strategies to configure how FlyteWorkflows are sharded among managed FlytePropeller instances. These include hash, which uses consitent hashing to load-balance evaluation over shards, and project / domain, which map the respective IDs to specific managed FlytePropeller instances. Below we include examples of helm configurations for each of the existing Shard Strategies. +Flyte provides a variety of Shard Strategies to configure how FlyteWorkflows are sharded among managed FlytePropeller instances. These include hash, which uses consistent hashing to load-balance evaluation over shards, and project / domain, which map the respective IDs to specific managed FlytePropeller instances. Below we include examples of helm configurations for each of the existing Shard Strategies. The Hash Shard Strategy, denoted by "type: hash" in the configuration below, uses consistent hashing to evenly distribute FlyteWorkflows over managed FlytePropeller instances. This configuration requires a "shard-count" variable which defines the number of managed FlytePropeller instances. @@ -259,6 +259,6 @@ Offloading Static Workflow Information from CRD Flyte uses a k8s CRD (Custom Resource Definition) to store and track workflow executions. This resource includes the workflow definition, for example tasks and subworkflows that are involved and the dependencies between nodes, but also includes the execution status of the workflow. The latter information (ie. runtime status) is dynamic, meaning changes during the workflow's execution as nodes transition phases and the workflow execution progresses. However, the former information (ie. workflow definition) remains static, meaning it will never change and is only consulted to retrieve node definitions and workflow dependencies. -CRDs are stored within etcd, a key-value datastore heavily used in kubernetes. Etcd requires a complete rewrite of the value data everytime a single field changes. Consequently, the read / write performance of etcd, as with all key-value stores, is strongly correlated with the size of the data. In Flyte's case, to guarantee only-once execution of nodes we need to persist workflow state by updating the CRD at every node phase change. As the size of a workflow increases this means we are frequently rewritting a large CRD. In addition to poor read / write performance in etcd this update may be restricted by a hard limit on the overall CRD size. +CRDs are stored within etcd, a key-value datastore heavily used in kubernetes. Etcd requires a complete rewrite of the value data every time a single field changes. Consequently, the read / write performance of etcd, as with all key-value stores, is strongly correlated with the size of the data. In Flyte's case, to guarantee only-once execution of nodes we need to persist workflow state by updating the CRD at every node phase change. As the size of a workflow increases this means we are frequently rewriting a large CRD. In addition to poor read / write performance in etcd this update may be restricted by a hard limit on the overall CRD size. To counter the challenges of large FlyteWorkflow CRDs Flyte includes a configuration option to offload the static portions of the CRD (ie. workflow / task / subworkflow definitions and node dependencies) to the blobstore. This functionality can be enabled by setting the ``useOffloadedWorkflowClosure`` option to ``true`` in the `FlyteAdmin configuration `_. When set, the FlyteWorkflow CRD will populate a ``WorkflowClosureReference`` field on the CRD with the location of the static data and FlytePropeller will read this information (through a cache) during each workflow evaluation. One important note is that currently this requires FlyteAdmin and FlytePropeller to have access to the same blobstore since FlyteAdmin only specifies a blobstore location in the CRD. diff --git a/rsts/index.rst b/rsts/index.rst index 2821c89218b..6ea3176607b 100644 --- a/rsts/index.rst +++ b/rsts/index.rst @@ -118,7 +118,7 @@ and ML Engineers in the industry use Flyte to create: - ETL pipelines for petabyte-scale data processing. - Analytics workflows for business and finance use cases. -- Machine learning pipelines for logistics, image processsing, and cancer diagnostics. +- Machine learning pipelines for logistics, image processing, and cancer diagnostics. Explore Flyte ============= diff --git a/script/generate_helm.sh b/script/generate_helm.sh index a41e39d8c4f..183b22debc8 100755 --- a/script/generate_helm.sh +++ b/script/generate_helm.sh @@ -54,7 +54,7 @@ ${GOPATH:-~/go}/bin/helm-docs -c ${DIR}/../charts/ if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: helm code updated without commiting generated code." + echo "FAILED: helm code updated without committing generated code." echo "Ensure make helm has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF" diff --git a/script/generate_kustomize.sh b/script/generate_kustomize.sh index 52700e1db5d..81a1142e009 100755 --- a/script/generate_kustomize.sh +++ b/script/generate_kustomize.sh @@ -29,7 +29,7 @@ done if [ -n "$DELTA_CHECK" ]; then DIRTY=$(git status --porcelain) if [ -n "$DIRTY" ]; then - echo "FAILED: kustomize code updated without commiting generated code." + echo "FAILED: kustomize code updated without committing generated code." echo "Ensure make kustomize has run and all changes are committed." DIFF=$(git diff) echo "diff detected: $DIFF"