diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index 295e7b17e2..1d6850e4d8 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -142,7 +142,7 @@ jobs: SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-autosave --benchmark-save-data --durations=5 - name: Upload Benchmark Artifact to S3 - run: aws s3 cp --recursive .benchmarks s3://feast-ci-pytest-benchmarks + run: aws s3 cp --recursive .benchmarks s3://feast-ci-pytest-benchmark build-all-docker-images: if: github.repository == 'feast-dev/feast' diff --git a/Makefile b/Makefile index c45e873fc7..869e2fa0ec 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ install-python-ci-dependencies-uv: python setup.py build_python_protos --inplace lock-python-ci-dependencies: - python -m piptools compile -U --extra ci --output-file sdk/python/requirements/py$(PYTHON)-ci-requirements.txt + uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py$(PYTHON)-ci-requirements.txt package-protos: cp -r ${ROOT_DIR}/protos ${ROOT_DIR}/sdk/python/feast/protos @@ -60,13 +60,15 @@ install-python: python setup.py develop lock-python-dependencies: - python -m piptools compile -U --output-file sdk/python/requirements/py$(PYTHON)-requirements.txt + uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py$(PYTHON)-requirements.txt lock-python-dependencies-all: - pixi run --environment py39 --manifest-path infra/scripts/pixi/pixi.toml "python -m piptools compile -U --output-file sdk/python/requirements/py3.9-requirements.txt" - pixi run --environment py39 --manifest-path infra/scripts/pixi/pixi.toml "python -m piptools compile -U --extra ci --output-file sdk/python/requirements/py3.9-ci-requirements.txt" - pixi run --environment py310 --manifest-path infra/scripts/pixi/pixi.toml "python -m piptools compile -U --output-file sdk/python/requirements/py3.10-requirements.txt" - pixi run --environment py310 --manifest-path infra/scripts/pixi/pixi.toml "python -m piptools compile -U --extra ci --output-file sdk/python/requirements/py3.10-ci-requirements.txt" + pixi run --environment py39 --manifest-path infra/scripts/pixi/pixi.toml "uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.9-requirements.txt" + pixi run --environment py39 --manifest-path infra/scripts/pixi/pixi.toml "uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.9-ci-requirements.txt" + pixi run --environment py310 --manifest-path infra/scripts/pixi/pixi.toml "uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.10-requirements.txt" + pixi run --environment py310 --manifest-path infra/scripts/pixi/pixi.toml "uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.10-ci-requirements.txt" + pixi run --environment py311 --manifest-path infra/scripts/pixi/pixi.toml "uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.11-requirements.txt" + pixi run --environment py311 --manifest-path infra/scripts/pixi/pixi.toml "uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.11-ci-requirements.txt" benchmark-python: FEAST_USAGE=False IS_TEST=True python -m pytest --integration --benchmark --benchmark-autosave --benchmark-save-data sdk/python/tests @@ -183,7 +185,7 @@ test-python-universal-athena: ATHENA_DATA_SOURCE=AwsDataCatalog \ ATHENA_DATABASE=default \ ATHENA_WORKGROUP=primary \ - ATHENA_S3_BUCKET_NAME=feast-integration-tests \ + ATHENA_S3_BUCKET_NAME=feast-int-bucket \ python -m pytest -n 8 --integration \ -k "not test_go_feature_server and \ not test_logged_features_validation and \ @@ -319,6 +321,25 @@ test-python-universal-cassandra-no-cloud-providers: not test_snowflake" \ sdk/python/tests + test-python-universal-elasticsearch-online: + PYTHONPATH='.' \ + FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.online_stores.contrib.elasticsearch_repo_configuration \ + PYTEST_PLUGINS=sdk.python.tests.integration.feature_repos.universal.online_store.elasticsearch \ + python -m pytest -n 8 --integration \ + -k "not test_universal_cli and \ + not test_go_feature_server and \ + not test_feature_logging and \ + not test_reorder_columns and \ + not test_logged_features_validation and \ + not test_lambda_materialization_consistency and \ + not test_offline_write and \ + not test_push_features_to_offline_store and \ + not gcs_registry and \ + not s3_registry and \ + not test_universal_types and \ + not test_snowflake" \ + sdk/python/tests + test-python-universal: python -m pytest -n 8 --integration sdk/python/tests @@ -370,9 +391,6 @@ kill-trino-locally: install-protoc-dependencies: pip install --ignore-installed protobuf==4.24.0 "grpcio-tools>=1.56.2,<2" mypy-protobuf==3.1.0 -install-feast-ci-locally: - pip install -e ".[ci]" - # Docker build-docker: build-feature-server-python-aws-docker build-feature-transformation-server-docker build-feature-server-java-docker diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 7b3c1a60b6..d2e03fe9a8 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -81,6 +81,7 @@ * [Snowflake](reference/offline-stores/snowflake.md) * [BigQuery](reference/offline-stores/bigquery.md) * [Redshift](reference/offline-stores/redshift.md) + * [DuckDB](reference/offline-stores/duckdb.md) * [Spark (contrib)](reference/offline-stores/spark.md) * [PostgreSQL (contrib)](reference/offline-stores/postgres.md) * [Trino (contrib)](reference/offline-stores/trino.md) diff --git a/docs/project/development-guide.md b/docs/project/development-guide.md index 2d4ab0c7c6..e3b09294bc 100644 --- a/docs/project/development-guide.md +++ b/docs/project/development-guide.md @@ -123,43 +123,44 @@ Note that this means if you are midway through working through a PR and rebase, Setting up your development environment for Feast Python SDK / CLI: 1. Ensure that you have Docker installed in your environment. Docker is used to provision service dependencies during testing, and build images for feature servers and other components. - Please note that we use [Docker with BuiltKit](https://docs.docker.com/develop/develop-images/build_enhancements/). -2. Ensure that you have `make`, Python (3.8 and above) with `pip`, installed. + - _Alternatively_ - To use [podman](https://podman.io/) on a Fedora or RHEL machine, follow this [guide](https://github.com/feast-dev/feast/issues/4190) +2. Ensure that you have `make` and Python (3.9 or above) installed. 3. _Recommended:_ Create a virtual environment to isolate development dependencies to be installed ```sh # create & activate a virtual environment python -m venv venv/ source venv/bin/activate ``` -4. Upgrade `pip` if outdated - ```sh - pip install --upgrade pip - ``` -5. (M1 Mac only): Follow the [dev guide](https://github.com/feast-dev/feast/issues/2105) -6. Install pip-tools - ```sh - pip install pip-tools - ``` -7. (Optional): Install Node & Yarn. Then run the following to build Feast UI artifacts for use in `feast ui` +4. (M1 Mac only): Follow the [dev guide](https://github.com/feast-dev/feast/issues/2105) +5. Install uv +It is recommended to use uv for managing python dependencies. +```sh +curl -LsSf https://astral.sh/uv/install.sh | sh +``` +or +```ssh +pip install uv +``` +6. (Optional): Install Node & Yarn. Then run the following to build Feast UI artifacts for use in `feast ui` ``` make build-ui ``` -8. Install mysql (needed for ci dependencies) +7. (Optional) install pixi +pixi is necessary to run step 8 for all python versions at once. ```sh -brew install mysql +curl -fsSL https://pixi.sh/install.sh | bash ``` -9. Install development dependencies for Feast Python SDK / CLI +8. (Optional): Recompile python lock files +If you make changes to requirements or simply want to update python lock files to reflect latest versioons. ```sh -pip install -e ".[dev]" -``` - -This will allow the installed feast version to automatically reflect changes to your local development version of Feast without needing to reinstall everytime you make code changes. - -10. Compile the protubufs +make lock-python-dependencies-all +``` +9. Install development dependencies for Feast Python SDK / CLI +This will install package versions from the lock file, install editable version of feast and compile protobufs. ```sh -make compile-protos-python +make install-python-ci-dependencies-uv ``` - -11. Spin up Docker Image +10. Spin up Docker Image ```sh docker build -t docker-whale -f ./sdk/python/feast/infra/feature_servers/multicloud/Dockerfile . ``` diff --git a/docs/reference/alpha-vector-database.md b/docs/reference/alpha-vector-database.md new file mode 100644 index 0000000000..37d9b9cdf8 --- /dev/null +++ b/docs/reference/alpha-vector-database.md @@ -0,0 +1,111 @@ +# [Alpha] Vector Database +**Warning**: This is an _experimental_ feature. To our knowledge, this is stable, but there are still rough edges in the experience. Contributions are welcome! + +## Overview +Vector database allows user to store and retrieve embeddings. Feast provides general APIs to store and retrieve embeddings. + +## Integration +Below are supported vector databases and implemented features: + +| Vector Database | Retrieval | Indexing | +|-----------------|-----------|----------| +| Pgvector | [x] | [ ] | +| Elasticsearch | [x] | [x] | +| Milvus | [ ] | [ ] | +| Faiss | [ ] | [ ] | + + +## Example + +See [https://github.com/feast-dev/feast-workshop/blob/rag/module_4_rag](https://github.com/feast-dev/feast-workshop/blob/rag/module_4_rag) for an example on how to use vector database. + +### **Prepare offline embedding dataset** +Run the following commands to prepare the embedding dataset: +```shell +python pull_states.py +python batch_score_documents.py +``` +The output will be stored in `data/city_wikipedia_summaries.csv.` + +### **Initialize Feast feature store and materialize the data to the online store** +Use the feature_tore.yaml file to initialize the feature store. This will use the data as offline store, and Pgvector as online store. + +```yaml +project: feast_demo_local +provider: local +registry: + registry_type: sql + path: postgresql://@localhost:5432/feast +online_store: + type: postgres + pgvector_enabled: true + vector_len: 384 + host: 127.0.0.1 + port: 5432 + database: feast + user: "" + password: "" + + +offline_store: + type: file +entity_key_serialization_version: 2 +``` +Run the following command in terminal to apply the feature store configuration: + +```shell +feast apply +``` + +Note that when you run `feast apply` you are going to apply the following Feature View that we will use for retrieval later: + +```python +city_embeddings_feature_view = FeatureView( + name="city_embeddings", + entities=[item], + schema=[ + Field(name="Embeddings", dtype=Array(Float32)), + ], + source=source, + ttl=timedelta(hours=2), +) +``` + +Then run the following command in the terminal to materialize the data to the online store: + +```shell +CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S") +feast materialize-incremental $CURRENT_TIME +``` + +### **Prepare a query embedding** +```python +from batch_score_documents import run_model, TOKENIZER, MODEL +from transformers import AutoTokenizer, AutoModel + +question = "the most populous city in the U.S. state of Texas?" + +tokenizer = AutoTokenizer.from_pretrained(TOKENIZER) +model = AutoModel.from_pretrained(MODEL) +query_embedding = run_model(question, tokenizer, model) +query = query_embedding.detach().cpu().numpy().tolist()[0] +``` + +### **Retrieve the top 5 similar documents** +First create a feature store instance, and use the `retrieve_online_documents` API to retrieve the top 5 similar documents to the specified query. + +```python +from feast import FeatureStore +store = FeatureStore(repo_path=".") +features = store.retrieve_online_documents( + feature="city_embeddings:Embeddings", + query=query, + top_k=5 +).to_dict() + +def print_online_features(features): + for key, value in sorted(features.items()): + print(key, " : ", value) + +print_online_features(features) +``` \ No newline at end of file diff --git a/docs/reference/data-sources/file.md b/docs/reference/data-sources/file.md index 5895b1a8ce..d3fd09deca 100644 --- a/docs/reference/data-sources/file.md +++ b/docs/reference/data-sources/file.md @@ -3,11 +3,7 @@ ## Description File data sources are files on disk or on S3. -Currently only Parquet files are supported. - -{% hint style="warning" %} -FileSource is meant for development purposes only and is not optimized for production use. -{% endhint %} +Currently only Parquet and Delta formats are supported. ## Example diff --git a/docs/reference/data-sources/overview.md b/docs/reference/data-sources/overview.md index 302c19b049..5c2fdce9fd 100644 --- a/docs/reference/data-sources/overview.md +++ b/docs/reference/data-sources/overview.md @@ -2,8 +2,8 @@ ## Functionality -In Feast, each batch data source is associated with a corresponding offline store. -For example, a `SnowflakeSource` can only be processed by the Snowflake offline store. +In Feast, each batch data source is associated with corresponding offline stores. +For example, a `SnowflakeSource` can only be processed by the Snowflake offline store, while a `FileSource` can be processed by both File and DuckDB offline stores. Otherwise, the primary difference between batch data sources is the set of supported types. Feast has an internal type system, and aims to support eight primitive types (`bytes`, `string`, `int32`, `int64`, `float32`, `float64`, `bool`, and `timestamp`) along with the corresponding array types. However, not every batch data source supports all of these types. diff --git a/docs/reference/offline-stores/README.md b/docs/reference/offline-stores/README.md index 149dac9101..77d09c8f14 100644 --- a/docs/reference/offline-stores/README.md +++ b/docs/reference/offline-stores/README.md @@ -22,6 +22,10 @@ Please see [Offline Store](../../getting-started/architecture-and-components/off [redshift.md](redshift.md) {% endcontent-ref %} +{% content-ref url="duckdb.md" %} +[duckdb.md](duckdb.md) +{% endcontent-ref %} + {% content-ref url="spark.md" %} [spark.md](spark.md) {% endcontent-ref %} diff --git a/docs/reference/offline-stores/duckdb.md b/docs/reference/offline-stores/duckdb.md new file mode 100644 index 0000000000..da3c3cd0c7 --- /dev/null +++ b/docs/reference/offline-stores/duckdb.md @@ -0,0 +1,56 @@ +# DuckDB offline store + +## Description + +The duckdb offline store provides support for reading [FileSources](../data-sources/file.md). It can read both Parquet and Delta formats. DuckDB offline store uses [ibis](https://ibis-project.org/) under the hood to convert offline store operations to DuckDB queries. + +* Entity dataframes can be provided as a Pandas dataframe. + +## Getting started +In order to use this offline store, you'll need to run `pip install 'feast[duckdb]'`. + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_project +registry: data/registry.db +provider: local +offline_store: + type: duckdb +online_store: + path: data/online_store.db +``` +{% endcode %} + +## Functionality Matrix + +The set of functionality supported by offline stores is described in detail [here](overview.md#functionality). +Below is a matrix indicating which functionality is supported by the DuckDB offline store. + +| | DuckdDB | +| :----------------------------------------------------------------- | :---- | +| `get_historical_features` (point-in-time correct join) | yes | +| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes | +| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes | +| `offline_write_batch` (persist dataframes to offline store) | yes | +| `write_logged_features` (persist logged features to offline store) | yes | + +Below is a matrix indicating which functionality is supported by `IbisRetrievalJob`. + +| | DuckDB| +| ----------------------------------------------------- | ----- | +| export to dataframe | yes | +| export to arrow table | yes | +| export to arrow batches | no | +| export to SQL | no | +| export to data lake (S3, GCS, etc.) | no | +| export to data warehouse | no | +| export as Spark dataframe | no | +| local execution of Python-based on-demand transforms | yes | +| remote execution of Python-based on-demand transforms | no | +| persist results in the offline store | yes | +| preview the query plan before execution | no | +| read partitioned data | yes | + +To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix). diff --git a/docs/reference/offline-stores/overview.md b/docs/reference/offline-stores/overview.md index 8ce9045496..4d7681e38c 100644 --- a/docs/reference/offline-stores/overview.md +++ b/docs/reference/offline-stores/overview.md @@ -42,17 +42,17 @@ Below is a matrix indicating which offline stores support which methods. Below is a matrix indicating which `RetrievalJob`s support what functionality. -| | File | BigQuery | Snowflake | Redshift | Postgres | Spark | Trino | -| --------------------------------- | --- | --- | --- | --- | --- | --- | --- | -| export to dataframe | yes | yes | yes | yes | yes | yes | yes | -| export to arrow table | yes | yes | yes | yes | yes | yes | yes | -| export to arrow batches | no | no | no | yes | no | no | no | -| export to SQL | no | yes | yes | yes | yes | no | yes | -| export to data lake (S3, GCS, etc.) | no | no | yes | no | yes | no | no | -| export to data warehouse | no | yes | yes | yes | yes | no | no | -| export as Spark dataframe | no | no | yes | no | no | yes | no | -| local execution of Python-based on-demand transforms | yes | yes | yes | yes | yes | no | yes | -| remote execution of Python-based on-demand transforms | no | no | no | no | no | no | no | -| persist results in the offline store | yes | yes | yes | yes | yes | yes | no | -| preview the query plan before execution | yes | yes | yes | yes | yes | yes | yes | -| read partitioned data | yes | yes | yes | yes | yes | yes | yes | +| | File | BigQuery | Snowflake | Redshift | Postgres | Spark | Trino | DuckDB | +| --------------------------------- | --- | --- | --- | --- | --- | --- | --- | --- | +| export to dataframe | yes | yes | yes | yes | yes | yes | yes | yes | +| export to arrow table | yes | yes | yes | yes | yes | yes | yes | yes | +| export to arrow batches | no | no | no | yes | no | no | no | no | +| export to SQL | no | yes | yes | yes | yes | no | yes | no | +| export to data lake (S3, GCS, etc.) | no | no | yes | no | yes | no | no | no | +| export to data warehouse | no | yes | yes | yes | yes | no | no | no | +| export as Spark dataframe | no | no | yes | no | no | yes | no | no | +| local execution of Python-based on-demand transforms | yes | yes | yes | yes | yes | no | yes | yes | +| remote execution of Python-based on-demand transforms | no | no | no | no | no | no | no | no | +| persist results in the offline store | yes | yes | yes | yes | yes | yes | no | yes | +| preview the query plan before execution | yes | yes | yes | yes | yes | yes | yes | no | +| read partitioned data | yes | yes | yes | yes | yes | yes | yes | yes | diff --git a/docs/reference/offline-stores/redshift.md b/docs/reference/offline-stores/redshift.md index e9bcbfeff1..e33a1856cb 100644 --- a/docs/reference/offline-stores/redshift.md +++ b/docs/reference/offline-stores/redshift.md @@ -130,8 +130,8 @@ The following inline policy can be used to grant Redshift necessary permissions "Action": "s3:*", "Effect": "Allow", "Resource": [ - "arn:aws:s3:::feast-integration-tests", - "arn:aws:s3:::feast-integration-tests/*" + "arn:aws:s3:::feast-int-bucket", + "arn:aws:s3:::feast-int-bucket/*" ] } ], diff --git a/docs/reference/online-stores/elasticsearch.md b/docs/reference/online-stores/elasticsearch.md new file mode 100644 index 0000000000..bf6f9a58db --- /dev/null +++ b/docs/reference/online-stores/elasticsearch.md @@ -0,0 +1,125 @@ +# ElasticSearch online store (contrib) + +## Description + +The ElasticSearch online store provides support for materializing tabular feature values, as well as embedding feature vectors, into an ElasticSearch index for serving online features. \ +The embedding feature vectors are stored as dense vectors, and can be used for similarity search. More information on dense vectors can be found [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html). + +## Getting started +In order to use this online store, you'll need to run `pip install 'feast[elasticsearch]'`. You can get started by then running `feast init -t elasticsearch`. + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: local +online_store: + type: elasticsearch + host: ES_HOST + port: ES_PORT + user: ES_USERNAME + password: ES_PASSWORD + vector_len: 512 + write_batch_size: 1000 +``` +{% endcode %} + +The full set of configuration options is available in [ElasticsearchOnlineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.online_stores.contrib.elasticsearch.ElasticsearchOnlineStoreConfig). + +## Functionality Matrix + + +| | Postgres | +| :-------------------------------------------------------- | :------- | +| write feature values to the online store | yes | +| read feature values from the online store | yes | +| update infrastructure (e.g. tables) in the online store | yes | +| teardown infrastructure (e.g. tables) in the online store | yes | +| generate a plan of infrastructure changes | no | +| support for on-demand transforms | yes | +| readable by Python SDK | yes | +| readable by Java | no | +| readable by Go | no | +| support for entityless feature views | yes | +| support for concurrent writing to the same key | no | +| support for ttl (time to live) at retrieval | no | +| support for deleting expired data | no | +| collocated by feature view | yes | +| collocated by feature service | no | +| collocated by entity key | no | + +To compare this set of functionality against other online stores, please see the full [functionality matrix](overview.md#functionality-matrix). + +## Retrieving online document vectors + +The ElasticSearch online store supports retrieving document vectors for a given list of entity keys. The document vectors are returned as a dictionary where the key is the entity key and the value is the document vector. The document vector is a dense vector of floats. + +{% code title="python" %} +```python +from feast import FeatureStore + +feature_store = FeatureStore(repo_path="feature_store.yaml") + +query_vector = [1.0, 2.0, 3.0, 4.0, 5.0] +top_k = 5 + +# Retrieve the top k closest features to the query vector + +feature_values = feature_store.retrieve_online_documents( + feature="my_feature", + query=query_vector, + top_k=top_k +) +``` +{% endcode %} + +## Indexing +Currently, the indexing mapping in the ElasticSearch online store is configured as: + +{% code title="indexing_mapping" %} +```json +"properties": { + "entity_key": {"type": "binary"}, + "feature_name": {"type": "keyword"}, + "feature_value": {"type": "binary"}, + "timestamp": {"type": "date"}, + "created_ts": {"type": "date"}, + "vector_value": { + "type": "dense_vector", + "dims": config.online_store.vector_len, + "index": "true", + "similarity": config.online_store.similarity, + }, +} +``` +{% endcode %} +And the online_read API mapping is configured as: + +{% code title="online_read_mapping" %} +```json +"query": { + "bool": { + "must": [ + {"terms": {"entity_key": entity_keys}}, + {"terms": {"feature_name": requested_features}}, + ] + } +}, +``` +{% endcode %} + +And the similarity search API mapping is configured as: + +{% code title="similarity_search_mapping" %} +```json +{ + "field": "vector_value", + "query_vector": embedding_vector, + "k": top_k, +} +``` +{% endcode %} + +These APIs are subject to change in future versions of Feast to improve performance and usability. \ No newline at end of file diff --git a/docs/reference/online-stores/postgres.md b/docs/reference/online-stores/postgres.md index 34d4de3488..77a9408d2b 100644 --- a/docs/reference/online-stores/postgres.md +++ b/docs/reference/online-stores/postgres.md @@ -65,10 +65,16 @@ To compare this set of functionality against other online stores, please see the ## PGVector The Postgres online store supports the use of [PGVector](https://github.com/pgvector/pgvector) for storing feature values. -To enable PGVector, set `pgvector_enabled: true` in the online store configuration. +To enable PGVector, set `pgvector_enabled: true` in the online store configuration. + The `vector_len` parameter can be used to specify the length of the vector. The default value is 512. -Then you can use `retrieve_online_documents` to retrieve the top k closest vectors to a query vector. +Please make sure to follow the instructions in the repository, which, as the time of this writing, requires you to +run `CREATE EXTENSION vector;` in the database. + + +Then you can use `retrieve_online_documents` to retrieve the top k closest vectors to a query vector. +For the Retrieval Augmented Generation (RAG) use-case, you have to embed the query prior to passing the query vector. {% code title="python" %} ```python diff --git a/infra/scripts/pixi/pixi.lock b/infra/scripts/pixi/pixi.lock index 65b761156e..19a32f32ae 100644 --- a/infra/scripts/pixi/pixi.lock +++ b/infra/scripts/pixi/pixi.lock @@ -1,6 +1,17 @@ version: 4 environments: default: + channels: + - url: https://conda.anaconda.org/conda-forge/ + packages: + linux-64: + - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda + py310: channels: - url: https://conda.anaconda.org/conda-forge/ packages: @@ -9,36 +20,25 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pip-tools-7.4.1-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pyproject_hooks-1.0.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.3-hab00c5b_0_cpython.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/python-build-1.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.10.14-hd12c33a_0_cpython.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda - py310: + py311: channels: - url: https://conda.anaconda.org/conda-forge/ packages: @@ -47,34 +47,25 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-hc881cc4_6.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-hc881cc4_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pip-tools-7.4.1-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pyproject_hooks-1.0.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.10.14-hd12c33a_0_cpython.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/python-build-1.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.11.9-hb806964_0_cpython.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda py39: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -84,34 +75,24 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pip-tools-7.4.1-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/pyproject_hooks-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/python-build-1.2.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda packages: - kind: conda name: _libgcc_mutex @@ -168,53 +149,6 @@ packages: license: ISC size: 155432 timestamp: 1706843687645 -- kind: conda - name: click - version: 8.1.7 - build: unix_pyh707e725_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda - sha256: f0016cbab6ac4138a429e28dbcb904a90305b34b3fe41a9b89d697c90401caec - md5: f3ad426304898027fc619827ff428eca - depends: - - __unix - - python >=3.8 - license: BSD-3-Clause - license_family: BSD - size: 84437 - timestamp: 1692311973840 -- kind: conda - name: colorama - version: 0.4.6 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2 - sha256: 2c1b2e9755ce3102bca8d69e8f26e4f087ece73f50418186aee7c74bef8e1698 - md5: 3faab06a954c2a04039983f2c4a50d99 - depends: - - python >=3.7 - license: BSD-3-Clause - license_family: BSD - size: 25170 - timestamp: 1666700778190 -- kind: conda - name: importlib-metadata - version: 7.1.0 - build: pyha770c72_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda - sha256: cc2e7d1f7f01cede30feafc1118b7aefa244d0a12224513734e24165ae12ba49 - md5: 0896606848b2dc5cebdf111b6543aa04 - depends: - - python >=3.8 - - zipp >=0.5 - license: Apache-2.0 - license_family: APACHE - size: 27043 - timestamp: 1710971498183 - kind: conda name: ld_impl_linux-64 version: '2.40' @@ -229,6 +163,20 @@ packages: license_family: GPL size: 704696 timestamp: 1674833944779 +- kind: conda + name: ld_impl_linux-64 + version: '2.40' + build: h55db66e_0 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda + sha256: ef969eee228cfb71e55146eaecc6af065f468cb0bc0a5239bc053b39db0b5f09 + md5: 10569984e7db886e4f1abc2b47ad79a1 + constrains: + - binutils_impl_linux-64 2.40 + license: GPL-3.0-only + license_family: GPL + size: 713322 + timestamp: 1713651222435 - kind: conda name: libexpat version: 2.6.2 @@ -278,6 +226,24 @@ packages: license_family: GPL size: 770506 timestamp: 1706819192021 +- kind: conda + name: libgcc-ng + version: 13.2.0 + build: hc881cc4_6 + build_number: 6 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-hc881cc4_6.conda + sha256: 836a0057525f1414de43642d357d0ab21ac7f85e24800b010dbc17d132e6efec + md5: df88796bd09a0d2ed292e59101478ad8 + depends: + - _libgcc_mutex 0.1 conda_forge + - _openmp_mutex >=4.5 + constrains: + - libgomp 13.2.0 hc881cc4_6 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 777315 + timestamp: 1713755001744 - kind: conda name: libgomp version: 13.2.0 @@ -293,6 +259,21 @@ packages: license_family: GPL size: 419751 timestamp: 1706819107383 +- kind: conda + name: libgomp + version: 13.2.0 + build: hc881cc4_6 + build_number: 6 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-hc881cc4_6.conda + sha256: e722b19b23b31a14b1592d5eceabb38dc52452ff5e4d346e330526971c22e52a + md5: aae89d3736661c36a5591788aebd0817 + depends: + - _libgcc_mutex 0.1 conda_forge + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 422363 + timestamp: 1713754915251 - kind: conda name: libnsl version: 2.0.1 @@ -321,6 +302,19 @@ packages: license: Unlicense size: 859858 timestamp: 1713367435849 +- kind: conda + name: libstdcxx-ng + version: 13.2.0 + build: h95c4c6d_6 + build_number: 6 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda + sha256: 2616dbf9d28431eea20b6e307145c6a92ea0328a047c725ff34b0316de2617da + md5: 3cfab3e709f77e9f1b3d380eb622494a + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 3842900 + timestamp: 1713755068572 - kind: conda name: libuuid version: 2.38.1 @@ -398,73 +392,22 @@ packages: size: 2865379 timestamp: 1710793235846 - kind: conda - name: packaging - version: '24.0' - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda - sha256: a390182d74c31dfd713c16db888c92c277feeb6d1fe96ff9d9c105f9564be48a - md5: 248f521b64ce055e7feae3105e7abeb8 + name: openssl + version: 3.3.0 + build: hd590300_0 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda + sha256: fdbf05e4db88c592366c90bb82e446edbe33c6e49e5130d51c580b2629c0b5d5 + md5: c0f3abb4a16477208bbd43a39bd56f18 depends: - - python >=3.8 + - ca-certificates + - libgcc-ng >=12 + constrains: + - pyopenssl >=22.1 license: Apache-2.0 - license_family: APACHE - size: 49832 - timestamp: 1710076089469 -- kind: conda - name: pip - version: '24.0' - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda - sha256: b7c1c5d8f13e8cb491c4bd1d0d1896a4cf80fc47de01059ad77509112b664a4a - md5: f586ac1e56c8638b64f9c8122a7b8a67 - depends: - - python >=3.7 - - setuptools - - wheel - license: MIT - license_family: MIT - size: 1398245 - timestamp: 1706960660581 -- kind: conda - name: pip-tools - version: 7.4.1 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/pip-tools-7.4.1-pyhd8ed1ab_0.conda - sha256: 5534c19a6233faed1c9109782322c9d31e536ce20448f8c90db3d864fb8f226d - md5: 73203bd783da9c37c2cdabb1f3b9d44d - depends: - - click >=7 - - pip >=21.2 - - python >=3.7 - - python-build - - setuptools - - wheel - license: BSD-3-Clause - license_family: BSD - size: 54113 - timestamp: 1709736180083 -- kind: conda - name: pyproject_hooks - version: 1.0.0 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/pyproject_hooks-1.0.0-pyhd8ed1ab_0.conda - sha256: 016340837fcfef57b351febcbe855eedf0c1f0ecfc910ed48c7fbd20535f9847 - md5: 21de50391d584eb7f4441b9de1ad773f - depends: - - python >=3.7 - - tomli >=1.1.0 - license: MIT - license_family: MIT - size: 13867 - timestamp: 1670268791173 + license_family: Apache + size: 2895187 + timestamp: 1714466138265 - kind: conda name: python version: 3.9.19 @@ -525,12 +468,12 @@ packages: timestamp: 1710939725109 - kind: conda name: python - version: 3.12.3 - build: hab00c5b_0_cpython + version: 3.11.9 + build: hb806964_0_cpython subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.3-hab00c5b_0_cpython.conda - sha256: f9865bcbff69f15fd89a33a2da12ad616e98d65ce7c83c644b92e66e5016b227 - md5: 2540b74d304f71d3e89c81209db4db84 + url: https://conda.anaconda.org/conda-forge/linux-64/python-3.11.9-hb806964_0_cpython.conda + sha256: 177f33a1fb8d3476b38f73c37b42f01c0b014fa0e039a701fd9f83d83aae6d40 + md5: ac68acfa8b558ed406c75e98d3428d7b depends: - bzip2 >=1.0.8,<2.0a0 - ld_impl_linux-64 >=2.36.1 @@ -538,7 +481,7 @@ packages: - libffi >=3.4,<4.0a0 - libgcc-ng >=12 - libnsl >=2.0.1,<2.1.0a0 - - libsqlite >=3.45.2,<4.0a0 + - libsqlite >=3.45.3,<4.0a0 - libuuid >=2.38.1,<3.0a0 - libxcrypt >=4.4.36 - libzlib >=1.2.13,<1.3.0a0 @@ -549,32 +492,10 @@ packages: - tzdata - xz >=5.2.6,<6.0a0 constrains: - - python_abi 3.12.* *_cp312 + - python_abi 3.11.* *_cp311 license: Python-2.0 - size: 31991381 - timestamp: 1713208036041 -- kind: conda - name: python-build - version: 1.2.1 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/python-build-1.2.1-pyhd8ed1ab_0.conda - sha256: 3104051be7279d1b15f0a4be79f4bfeaf3a42b2900d24a7ad8e980df903fe8db - md5: d657cde3b3943fcedf6038138eea84de - depends: - - colorama - - importlib-metadata >=4.6 - - packaging >=19.0 - - pyproject_hooks - - python >=3.8 - - tomli >=1.1.0 - constrains: - - build <0 - license: MIT - license_family: MIT - size: 24434 - timestamp: 1711647439510 + size: 30884494 + timestamp: 1713553104915 - kind: conda name: readline version: '8.2' @@ -591,21 +512,6 @@ packages: license_family: GPL size: 281456 timestamp: 1679532220005 -- kind: conda - name: setuptools - version: 69.5.1 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda - sha256: 72d143408507043628b32bed089730b6d5f5445eccc44b59911ec9f262e365e7 - md5: 7462280d81f639363e6e63c81276bd9e - depends: - - python >=3.8 - license: MIT - license_family: MIT - size: 501790 - timestamp: 1713094963112 - kind: conda name: tk version: 8.6.13 @@ -622,21 +528,6 @@ packages: license_family: BSD size: 3318875 timestamp: 1699202167581 -- kind: conda - name: tomli - version: 2.0.1 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2 - sha256: 4cd48aba7cd026d17e86886af48d0d2ebc67ed36f87f6534f4b67138f5a5a58f - md5: 5844808ffab9ebdb694585b50ba02a96 - depends: - - python >=3.7 - license: MIT - license_family: MIT - size: 15940 - timestamp: 1644342331069 - kind: conda name: tzdata version: 2024a @@ -650,21 +541,19 @@ packages: size: 119815 timestamp: 1706886945727 - kind: conda - name: wheel - version: 0.43.0 - build: pyhd8ed1ab_1 - build_number: 1 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda - sha256: cb318f066afd6fd64619f14c030569faf3f53e6f50abf743b4c865e7d95b96bc - md5: 0b5293a157c2b5cd513dd1b03d8d3aae + name: uv + version: 0.1.39 + build: h0ea3d13_0 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda + sha256: 763d149b6f4f5c70c91e4106d3a48409c48283ed2e27392578998fb2441f23d8 + md5: c3206e7ca254e50b3556917886f9b12b depends: - - python >=3.8 - license: MIT - license_family: MIT - size: 57963 - timestamp: 1711546009410 + - libgcc-ng >=12 + - libstdcxx-ng >=12 + license: Apache-2.0 OR MIT + size: 11891252 + timestamp: 1714233659570 - kind: conda name: xz version: 5.2.6 @@ -678,18 +567,3 @@ packages: license: LGPL-2.1 and GPL-2.0 size: 418368 timestamp: 1660346797927 -- kind: conda - name: zipp - version: 3.17.0 - build: pyhd8ed1ab_0 - subdir: noarch - noarch: python - url: https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda - sha256: bced1423fdbf77bca0a735187d05d9b9812d2163f60ab426fc10f11f92ecbe26 - md5: 2e4d6bc0b14e10f895fc6791a7d9b26a - depends: - - python >=3.8 - license: MIT - license_family: MIT - size: 18954 - timestamp: 1695255262261 diff --git a/infra/scripts/pixi/pixi.toml b/infra/scripts/pixi/pixi.toml index 80a29d3a59..f0d360fff3 100644 --- a/infra/scripts/pixi/pixi.toml +++ b/infra/scripts/pixi/pixi.toml @@ -6,7 +6,7 @@ platforms = ["linux-64"] [tasks] [dependencies] -pip-tools = ">=7.4.1,<7.5" +uv = ">=0.1.39,<0.2" [feature.py39.dependencies] python = "~=3.9.0" @@ -14,6 +14,10 @@ python = "~=3.9.0" [feature.py310.dependencies] python = "~=3.10.0" +[feature.py311.dependencies] +python = "~=3.11.0" + [environments] py39 = ["py39"] -py310 = ["py310"] \ No newline at end of file +py310 = ["py310"] +py311 = ["py311"] diff --git a/protos/feast/core/DatastoreTable.proto b/protos/feast/core/DatastoreTable.proto index 4246a6ae6e..acd3ba57b5 100644 --- a/protos/feast/core/DatastoreTable.proto +++ b/protos/feast/core/DatastoreTable.proto @@ -36,4 +36,7 @@ message DatastoreTable { // Datastore namespace google.protobuf.StringValue namespace = 4; + + // Firestore database + google.protobuf.StringValue database = 5; } \ No newline at end of file diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index fda8745c2d..98a8c0caf4 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -3,6 +3,7 @@ import threading import traceback import warnings +from contextlib import asynccontextmanager from typing import List, Optional import pandas as pd @@ -50,15 +51,16 @@ def get_app( registry_ttl_sec: int = DEFAULT_FEATURE_SERVER_REGISTRY_TTL, ): proto_json.patch() - - app = FastAPI() # Asynchronously refresh registry, notifying shutdown and canceling the active timer if the app is shutting down registry_proto = None shutting_down = False active_timer: Optional[threading.Timer] = None - async def get_body(request: Request): - return await request.body() + def stop_refresh(): + nonlocal shutting_down + shutting_down = True + if active_timer: + active_timer.cancel() def async_refresh(): store.refresh_registry() @@ -70,14 +72,16 @@ def async_refresh(): active_timer = threading.Timer(registry_ttl_sec, async_refresh) active_timer.start() - @app.on_event("shutdown") - def shutdown_event(): - nonlocal shutting_down - shutting_down = True - if active_timer: - active_timer.cancel() + @asynccontextmanager + async def lifespan(app: FastAPI): + async_refresh() + yield + stop_refresh() - async_refresh() + app = FastAPI(lifespan=lifespan) + + async def get_body(request: Request): + return await request.body() @app.post("/get-online-features") def get_online_features(body=Depends(get_body)): diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index bc492e4208..2fe885865d 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1550,6 +1550,54 @@ def get_online_features( native_entity_values=True, ) + @log_exceptions_and_usage + async def get_online_features_async( + self, + features: Union[List[str], FeatureService], + entity_rows: List[Dict[str, Any]], + full_feature_names: bool = False, + ) -> OnlineResponse: + """ + [Alpha] Retrieves the latest online feature data asynchronously. + + Note: This method will download the full feature registry the first time it is run. If you are using a + remote registry like GCS or S3 then that may take a few seconds. The registry remains cached up to a TTL + duration (which can be set to infinity). If the cached registry is stale (more time than the TTL has + passed), then a new registry will be downloaded synchronously by this method. This download may + introduce latency to online feature retrieval. In order to avoid synchronous downloads, please call + refresh_registry() prior to the TTL being reached. Remember it is possible to set the cache TTL to + infinity (cache forever). + + Args: + features: The list of features that should be retrieved from the online store. These features can be + specified either as a list of string feature references or as a feature service. String feature + references must have format "feature_view:feature", e.g. "customer_fv:daily_transactions". + entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. + full_feature_names: If True, feature names will be prefixed with the corresponding feature view name, + changing them from the format "feature" to "feature_view__feature" (e.g. "daily_transactions" + changes to "customer_fv__daily_transactions"). + + Returns: + OnlineResponse containing the feature data in records. + + Raises: + Exception: No entity with the specified name exists. + """ + columnar: Dict[str, List[Any]] = {k: [] for k in entity_rows[0].keys()} + for entity_row in entity_rows: + for key, value in entity_row.items(): + try: + columnar[key].append(value) + except KeyError as e: + raise ValueError("All entity_rows must have the same keys.") from e + + return await self._get_online_features_async( + features=features, + entity_values=columnar, + full_feature_names=full_feature_names, + native_entity_values=True, + ) + def _get_online_request_context( self, features: Union[List[str], FeatureService], full_feature_names: bool ): @@ -1609,7 +1657,7 @@ def _get_online_request_context( entityless_case, ) - def _get_online_features( + def _prepare_entities_to_read_from_online_store( self, features: Union[List[str], FeatureService], entity_values: Mapping[ @@ -1619,7 +1667,7 @@ def _get_online_features( native_entity_values: bool = True, ): ( - _feature_refs, + feature_refs, requested_on_demand_feature_views, entity_name_to_join_key_map, entity_type_map, @@ -1694,6 +1742,40 @@ def _get_online_features( [DUMMY_ENTITY_VAL] * num_rows, DUMMY_ENTITY.value_type ) + return ( + join_key_values, + grouped_refs, + entity_name_to_join_key_map, + requested_on_demand_feature_views, + feature_refs, + requested_result_row_names, + online_features_response, + ) + + def _get_online_features( + self, + features: Union[List[str], FeatureService], + entity_values: Mapping[ + str, Union[Sequence[Any], Sequence[Value], RepeatedValue] + ], + full_feature_names: bool = False, + native_entity_values: bool = True, + ): + ( + join_key_values, + grouped_refs, + entity_name_to_join_key_map, + requested_on_demand_feature_views, + feature_refs, + requested_result_row_names, + online_features_response, + ) = self._prepare_entities_to_read_from_online_store( + features=features, + entity_values=entity_values, + full_feature_names=full_feature_names, + native_entity_values=native_entity_values, + ) + provider = self._get_provider() for table, requested_features in grouped_refs: # Get the correct set of entity values with the correct join keys. @@ -1724,7 +1806,71 @@ def _get_online_features( if requested_on_demand_feature_views: self._augment_response_with_on_demand_transforms( online_features_response, - _feature_refs, + feature_refs, + requested_on_demand_feature_views, + full_feature_names, + ) + + self._drop_unneeded_columns( + online_features_response, requested_result_row_names + ) + return OnlineResponse(online_features_response) + + async def _get_online_features_async( + self, + features: Union[List[str], FeatureService], + entity_values: Mapping[ + str, Union[Sequence[Any], Sequence[Value], RepeatedValue] + ], + full_feature_names: bool = False, + native_entity_values: bool = True, + ): + ( + join_key_values, + grouped_refs, + entity_name_to_join_key_map, + requested_on_demand_feature_views, + feature_refs, + requested_result_row_names, + online_features_response, + ) = self._prepare_entities_to_read_from_online_store( + features=features, + entity_values=entity_values, + full_feature_names=full_feature_names, + native_entity_values=native_entity_values, + ) + + provider = self._get_provider() + for table, requested_features in grouped_refs: + # Get the correct set of entity values with the correct join keys. + table_entity_values, idxs = self._get_unique_entities( + table, + join_key_values, + entity_name_to_join_key_map, + ) + + # Fetch feature data for the minimum set of Entities. + feature_data = await self._read_from_online_store_async( + table_entity_values, + provider, + requested_features, + table, + ) + + # Populate the result_rows with the Features from the OnlineStore inplace. + self._populate_response_from_feature_data( + feature_data, + idxs, + online_features_response, + full_feature_names, + requested_features, + table, + ) + + if requested_on_demand_feature_views: + self._augment_response_with_on_demand_transforms( + online_features_response, + feature_refs, requested_on_demand_feature_views, full_feature_names, ) @@ -1740,6 +1886,7 @@ def retrieve_online_documents( feature: str, query: Union[str, List[float]], top_k: int, + distance_metric: Optional[str] = None, ) -> OnlineResponse: """ Retrieves the top k closest document features. Note, embeddings are a subset of features. @@ -1750,11 +1897,13 @@ def retrieve_online_documents( references must have format "feature_view:feature", e.g, "document_fv:document_embeddings". query: The query to retrieve the closest document features for. top_k: The number of closest document features to retrieve. + distance_metric: The distance metric to use for retrieval. """ return self._retrieve_online_documents( feature=feature, query=query, top_k=top_k, + distance_metric=distance_metric, ) def _retrieve_online_documents( @@ -1762,6 +1911,7 @@ def _retrieve_online_documents( feature: str, query: Union[str, List[float]], top_k: int, + distance_metric: Optional[str] = None, ): if isinstance(query, str): raise ValueError( @@ -1783,6 +1933,7 @@ def _retrieve_online_documents( requested_feature, query, top_k, + distance_metric, ) # TODO Refactor to better way of populating result @@ -1960,38 +2111,24 @@ def _get_unique_entities( ) return unique_entities, indexes - def _read_from_online_store( + def _get_entity_key_protos( self, entity_rows: Iterable[Mapping[str, Value]], - provider: Provider, - requested_features: List[str], - table: FeatureView, - ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: - """Read and process data from the OnlineStore for a given FeatureView. - - This method guarantees that the order of the data in each element of the - List returned is the same as the order of `requested_features`. - - This method assumes that `provider.online_read` returns data for each - combination of Entities in `entity_rows` in the same order as they - are provided. - """ + ) -> List[EntityKeyProto]: # Instantiate one EntityKeyProto per Entity. entity_key_protos = [ EntityKeyProto(join_keys=row.keys(), entity_values=row.values()) for row in entity_rows ] + return entity_key_protos - # Fetch data for Entities. - read_rows = provider.online_read( - config=self.config, - table=table, - entity_keys=entity_key_protos, - requested_features=requested_features, - ) - - # Each row is a set of features for a given entity key. We only need to convert - # the data to Protobuf once. + def _convert_rows_to_protobuf( + self, + requested_features: List[str], + read_rows: List[Tuple[Optional[datetime], Optional[Dict[str, Value]]]], + ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: + # Each row is a set of features for a given entity key. + # We only need to convert the data to Protobuf once. null_value = Value() read_row_protos = [] for read_row in read_rows: @@ -2018,6 +2155,53 @@ def _read_from_online_store( read_row_protos.append((event_timestamps, statuses, values)) return read_row_protos + def _read_from_online_store( + self, + entity_rows: Iterable[Mapping[str, Value]], + provider: Provider, + requested_features: List[str], + table: FeatureView, + ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: + """Read and process data from the OnlineStore for a given FeatureView. + + This method guarantees that the order of the data in each element of the + List returned is the same as the order of `requested_features`. + + This method assumes that `provider.online_read` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. + """ + entity_key_protos = self._get_entity_key_protos(entity_rows) + + # Fetch data for Entities. + read_rows = provider.online_read( + config=self.config, + table=table, + entity_keys=entity_key_protos, + requested_features=requested_features, + ) + + return self._convert_rows_to_protobuf(requested_features, read_rows) + + async def _read_from_online_store_async( + self, + entity_rows: Iterable[Mapping[str, Value]], + provider: Provider, + requested_features: List[str], + table: FeatureView, + ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: + entity_key_protos = self._get_entity_key_protos(entity_rows) + + # Fetch data for Entities. + read_rows = await provider.online_read_async( + config=self.config, + table=table, + entity_keys=entity_key_protos, + requested_features=requested_features, + ) + + return self._convert_rows_to_protobuf(requested_features, read_rows) + def _retrieve_from_online_store( self, provider: Provider, @@ -2025,6 +2209,7 @@ def _retrieve_from_online_store( requested_feature: str, query: List[float], top_k: int, + distance_metric: Optional[str], ) -> List[Tuple[Timestamp, "FieldStatus.ValueType", Value, Value, Value]]: """ Search and return document features from the online document store. @@ -2035,6 +2220,7 @@ def _retrieve_from_online_store( requested_feature=requested_feature, query=query, top_k=top_k, + distance_metric=distance_metric, ) read_row_protos = [] diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py index f01144afcc..f95a750fd1 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py @@ -31,7 +31,7 @@ def __init__(self, project_name: str, *args, **kwargs): data_source = os.getenv("ATHENA_DATA_SOURCE", "AwsDataCatalog") database = os.getenv("ATHENA_DATABASE", "default") workgroup = os.getenv("ATHENA_WORKGROUP", "primary") - bucket_name = os.getenv("ATHENA_S3_BUCKET_NAME", "feast-integration-tests") + bucket_name = os.getenv("ATHENA_S3_BUCKET_NAME", "feast-int-bucket") self.client = aws_utils.get_athena_data_client(region) self.s3 = aws_utils.get_s3_resource(region) diff --git a/sdk/python/feast/infra/offline_stores/duckdb.py b/sdk/python/feast/infra/offline_stores/duckdb.py index d43286f371..8e392425ea 100644 --- a/sdk/python/feast/infra/offline_stores/duckdb.py +++ b/sdk/python/feast/infra/offline_stores/duckdb.py @@ -1,8 +1,91 @@ +import os +from datetime import datetime +from pathlib import Path +from typing import Any, Callable, List, Optional, Union + import ibis +import pandas as pd +import pyarrow +from ibis.expr.types import Table from pydantic import StrictStr -from feast.infra.offline_stores.ibis import IbisOfflineStore -from feast.repo_config import FeastConfigBaseModel +from feast.data_format import DeltaFormat, ParquetFormat +from feast.data_source import DataSource +from feast.errors import SavedDatasetLocationAlreadyExists +from feast.feature_logging import LoggingConfig, LoggingSource +from feast.feature_view import FeatureView +from feast.infra.offline_stores.file_source import FileSource +from feast.infra.offline_stores.ibis import ( + get_historical_features_ibis, + offline_write_batch_ibis, + pull_all_from_table_or_query_ibis, + pull_latest_from_table_or_query_ibis, + write_logged_features_ibis, +) +from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.registry.base_registry import BaseRegistry +from feast.repo_config import FeastConfigBaseModel, RepoConfig + + +def _read_data_source(data_source: DataSource) -> Table: + assert isinstance(data_source, FileSource) + + if isinstance(data_source.file_format, ParquetFormat): + return ibis.read_parquet(data_source.path) + elif isinstance(data_source.file_format, DeltaFormat): + return ibis.read_delta(data_source.path) + + +def _write_data_source( + table: Table, + data_source: DataSource, + mode: str = "append", + allow_overwrite: bool = False, +): + assert isinstance(data_source, FileSource) + + file_options = data_source.file_options + + if mode == "overwrite" and not allow_overwrite and os.path.exists(file_options.uri): + raise SavedDatasetLocationAlreadyExists(location=file_options.uri) + + if isinstance(data_source.file_format, ParquetFormat): + if mode == "overwrite": + table = table.to_pyarrow() + filesystem, path = FileSource.create_filesystem_and_path( + file_options.uri, + file_options.s3_endpoint_override, + ) + + if path.endswith(".parquet"): + pyarrow.parquet.write_table(table, where=path, filesystem=filesystem) + else: + # otherwise assume destination is directory + pyarrow.parquet.write_to_dataset( + table, root_path=path, filesystem=filesystem + ) + elif mode == "append": + table = table.to_pyarrow() + prev_table = ibis.read_parquet(file_options.uri).to_pyarrow() + if table.schema != prev_table.schema: + table = table.cast(prev_table.schema) + new_table = pyarrow.concat_tables([table, prev_table]) + ibis.memtable(new_table).to_parquet(file_options.uri) + elif isinstance(data_source.file_format, DeltaFormat): + if mode == "append": + from deltalake import DeltaTable + + prev_schema = DeltaTable(file_options.uri).schema().to_pyarrow() + table = table.cast(ibis.Schema.from_pyarrow(prev_schema)) + write_mode = "append" + elif mode == "overwrite": + write_mode = ( + "overwrite" + if allow_overwrite and os.path.exists(file_options.uri) + else "error" + ) + + table.to_delta(file_options.uri, mode=write_mode) class DuckDBOfflineStoreConfig(FeastConfigBaseModel): @@ -10,8 +93,102 @@ class DuckDBOfflineStoreConfig(FeastConfigBaseModel): # """ Offline store type selector""" -class DuckDBOfflineStore(IbisOfflineStore): +class DuckDBOfflineStore(OfflineStore): + @staticmethod + def pull_latest_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + return pull_latest_from_table_or_query_ibis( + config=config, + data_source=data_source, + join_key_columns=join_key_columns, + feature_name_columns=feature_name_columns, + timestamp_field=timestamp_field, + created_timestamp_column=created_timestamp_column, + start_date=start_date, + end_date=end_date, + data_source_reader=_read_data_source, + data_source_writer=_write_data_source, + ) + + @staticmethod + def get_historical_features( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: BaseRegistry, + project: str, + full_feature_names: bool = False, + ) -> RetrievalJob: + return get_historical_features_ibis( + config=config, + feature_views=feature_views, + feature_refs=feature_refs, + entity_df=entity_df, + registry=registry, + project=project, + full_feature_names=full_feature_names, + data_source_reader=_read_data_source, + data_source_writer=_write_data_source, + ) + + @staticmethod + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + return pull_all_from_table_or_query_ibis( + config=config, + data_source=data_source, + join_key_columns=join_key_columns, + feature_name_columns=feature_name_columns, + timestamp_field=timestamp_field, + start_date=start_date, + end_date=end_date, + data_source_reader=_read_data_source, + data_source_writer=_write_data_source, + ) + + @staticmethod + def offline_write_batch( + config: RepoConfig, + feature_view: FeatureView, + table: pyarrow.Table, + progress: Optional[Callable[[int], Any]], + ): + offline_write_batch_ibis( + config=config, + feature_view=feature_view, + table=table, + progress=progress, + data_source_writer=_write_data_source, + ) + @staticmethod - def setup_ibis_backend(): - # there's no need to call setup as duckdb is default ibis backend - ibis.set_backend("duckdb") + def write_logged_features( + config: RepoConfig, + data: Union[pyarrow.Table, Path], + source: LoggingSource, + logging_config: LoggingConfig, + registry: BaseRegistry, + ): + write_logged_features_ibis( + config=config, + data=data, + source=source, + logging_config=logging_config, + registry=registry, + ) diff --git a/sdk/python/feast/infra/offline_stores/ibis.py b/sdk/python/feast/infra/offline_stores/ibis.py index de025ca006..b9efb87a36 100644 --- a/sdk/python/feast/infra/offline_stores/ibis.py +++ b/sdk/python/feast/infra/offline_stores/ibis.py @@ -1,4 +1,3 @@ -import os import uuid from datetime import datetime, timedelta from pathlib import Path @@ -13,19 +12,14 @@ from ibis.expr.types import Table from pytz import utc -from feast.data_format import DeltaFormat, ParquetFormat from feast.data_source import DataSource -from feast.errors import SavedDatasetLocationAlreadyExists from feast.feature_logging import LoggingConfig, LoggingSource from feast.feature_view import FeatureView from feast.infra.offline_stores import offline_utils from feast.infra.offline_stores.file_source import ( FileLoggingDestination, - FileSource, - SavedDatasetFileStorage, ) from feast.infra.offline_stores.offline_store import ( - OfflineStore, RetrievalJob, RetrievalMetadata, ) @@ -42,348 +36,300 @@ def _get_entity_schema(entity_df: pd.DataFrame) -> Dict[str, np.dtype]: return dict(zip(entity_df.columns, entity_df.dtypes)) -class IbisOfflineStore(OfflineStore): - @staticmethod - def pull_latest_from_table_or_query( - config: RepoConfig, - data_source: DataSource, - join_key_columns: List[str], - feature_name_columns: List[str], - timestamp_field: str, - created_timestamp_column: Optional[str], - start_date: datetime, - end_date: datetime, - ) -> RetrievalJob: - raise NotImplementedError() - - def _get_entity_df_event_timestamp_range( - entity_df: pd.DataFrame, entity_df_event_timestamp_col: str - ) -> Tuple[datetime, datetime]: - entity_df_event_timestamp = entity_df.loc[ - :, entity_df_event_timestamp_col - ].infer_objects() - if pd.api.types.is_string_dtype(entity_df_event_timestamp): - entity_df_event_timestamp = pd.to_datetime( - entity_df_event_timestamp, utc=True - ) - entity_df_event_timestamp_range = ( - entity_df_event_timestamp.min().to_pydatetime(), - entity_df_event_timestamp.max().to_pydatetime(), +def pull_latest_from_table_or_query_ibis( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + data_source_reader: Callable[[DataSource], Table], + data_source_writer: Callable[[pyarrow.Table, DataSource], None], +) -> RetrievalJob: + fields = join_key_columns + feature_name_columns + [timestamp_field] + if created_timestamp_column: + fields.append(created_timestamp_column) + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + table = data_source_reader(data_source) + + table = table.select(*fields) + + # TODO get rid of this fix + if "__log_date" in table.columns: + table = table.drop("__log_date") + + table = table.filter( + ibis.and_( + table[timestamp_field] >= ibis.literal(start_date), + table[timestamp_field] <= ibis.literal(end_date), ) + ) + + table = deduplicate( + table=table, + group_by_cols=join_key_columns, + event_timestamp_col=timestamp_field, + created_timestamp_col=created_timestamp_column, + ) + + return IbisRetrievalJob( + table=table, + on_demand_feature_views=[], + full_feature_names=False, + metadata=None, + data_source_writer=data_source_writer, + ) + + +def _get_entity_df_event_timestamp_range( + entity_df: pd.DataFrame, entity_df_event_timestamp_col: str +) -> Tuple[datetime, datetime]: + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) + entity_df_event_timestamp_range = ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) + + return entity_df_event_timestamp_range + + +def _to_utc(entity_df: pd.DataFrame, event_timestamp_col): + entity_df_event_timestamp = entity_df.loc[:, event_timestamp_col].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) + + entity_df[event_timestamp_col] = entity_df_event_timestamp + return entity_df + + +def _generate_row_id( + entity_table: Table, feature_views: List[FeatureView], event_timestamp_col +) -> Table: + all_entities = [event_timestamp_col] + for fv in feature_views: + if fv.projection.join_key_map: + all_entities.extend(fv.projection.join_key_map.values()) + else: + all_entities.extend([e.name for e in fv.entity_columns]) - return entity_df_event_timestamp_range - - @staticmethod - def _to_utc(entity_df: pd.DataFrame, event_timestamp_col): - entity_df_event_timestamp = entity_df.loc[ - :, event_timestamp_col - ].infer_objects() - if pd.api.types.is_string_dtype(entity_df_event_timestamp): - entity_df_event_timestamp = pd.to_datetime( - entity_df_event_timestamp, utc=True - ) - - entity_df[event_timestamp_col] = entity_df_event_timestamp - return entity_df - - @staticmethod - def _generate_row_id( - entity_table: Table, feature_views: List[FeatureView], event_timestamp_col - ) -> Table: - all_entities = [event_timestamp_col] - for fv in feature_views: - if fv.projection.join_key_map: - all_entities.extend(fv.projection.join_key_map.values()) - else: - all_entities.extend([e.name for e in fv.entity_columns]) - - r = ibis.literal("") - - for e in set(all_entities): - r = r.concat(entity_table[e].cast("string")) # type: ignore - - entity_table = entity_table.mutate(entity_row_id=r) - - return entity_table - - @staticmethod - def _read_data_source(data_source: DataSource) -> Table: - assert isinstance(data_source, FileSource) - - if isinstance(data_source.file_format, ParquetFormat): - return ibis.read_parquet(data_source.path) - elif isinstance(data_source.file_format, DeltaFormat): - return ibis.read_delta(data_source.path) - - @staticmethod - def get_historical_features( - config: RepoConfig, - feature_views: List[FeatureView], - feature_refs: List[str], - entity_df: Union[pd.DataFrame, str], - registry: BaseRegistry, - project: str, - full_feature_names: bool = False, - ) -> RetrievalJob: - entity_schema = _get_entity_schema( - entity_df=entity_df, - ) - event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema=entity_schema, - ) + r = ibis.literal("") - # TODO get range with ibis - timestamp_range = IbisOfflineStore._get_entity_df_event_timestamp_range( - entity_df, event_timestamp_col - ) + for e in set(all_entities): + r = r.concat(entity_table[e].cast("string")) # type: ignore - entity_df = IbisOfflineStore._to_utc(entity_df, event_timestamp_col) + entity_table = entity_table.mutate(entity_row_id=r) - entity_table = ibis.memtable(entity_df) - entity_table = IbisOfflineStore._generate_row_id( - entity_table, feature_views, event_timestamp_col + return entity_table + + +def get_historical_features_ibis( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: BaseRegistry, + project: str, + data_source_reader: Callable[[DataSource], Table], + data_source_writer: Callable[[pyarrow.Table, DataSource], None], + full_feature_names: bool = False, +) -> RetrievalJob: + entity_schema = _get_entity_schema( + entity_df=entity_df, + ) + event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema=entity_schema, + ) + + # TODO get range with ibis + timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, event_timestamp_col + ) + + entity_df = _to_utc(entity_df, event_timestamp_col) + + entity_table = ibis.memtable(entity_df) + entity_table = _generate_row_id(entity_table, feature_views, event_timestamp_col) + + def read_fv( + feature_view: FeatureView, feature_refs: List[str], full_feature_names: bool + ) -> Tuple: + fv_table: Table = data_source_reader(feature_view.batch_source) + + for old_name, new_name in feature_view.batch_source.field_mapping.items(): + if old_name in fv_table.columns: + fv_table = fv_table.rename({new_name: old_name}) + + timestamp_field = feature_view.batch_source.timestamp_field + + # TODO mutate only if tz-naive + fv_table = fv_table.mutate( + **{ + timestamp_field: fv_table[timestamp_field].cast( + dt.Timestamp(timezone="UTC") + ) + } ) - def read_fv( - feature_view: FeatureView, feature_refs: List[str], full_feature_names: bool - ) -> Tuple: - fv_table: Table = IbisOfflineStore._read_data_source( - feature_view.batch_source - ) - - for old_name, new_name in feature_view.batch_source.field_mapping.items(): - if old_name in fv_table.columns: - fv_table = fv_table.rename({new_name: old_name}) + full_name_prefix = feature_view.projection.name_alias or feature_view.name - timestamp_field = feature_view.batch_source.timestamp_field + feature_refs = [ + fr.split(":")[1] + for fr in feature_refs + if fr.startswith(f"{full_name_prefix}:") + ] - # TODO mutate only if tz-naive - fv_table = fv_table.mutate( - **{ - timestamp_field: fv_table[timestamp_field].cast( - dt.Timestamp(timezone="UTC") - ) - } + if full_feature_names: + fv_table = fv_table.rename( + {f"{full_name_prefix}__{feature}": feature for feature in feature_refs} ) - full_name_prefix = feature_view.projection.name_alias or feature_view.name - feature_refs = [ - fr.split(":")[1] - for fr in feature_refs - if fr.startswith(f"{full_name_prefix}:") + f"{full_name_prefix}__{feature}" for feature in feature_refs ] - if full_feature_names: - fv_table = fv_table.rename( - { - f"{full_name_prefix}__{feature}": feature - for feature in feature_refs - } - ) - - feature_refs = [ - f"{full_name_prefix}__{feature}" for feature in feature_refs - ] - - return ( - fv_table, - feature_view.batch_source.timestamp_field, - feature_view.batch_source.created_timestamp_column, - feature_view.projection.join_key_map - or {e.name: e.name for e in feature_view.entity_columns}, - feature_refs, - feature_view.ttl, - ) - - res = point_in_time_join( - entity_table=entity_table, - feature_tables=[ - read_fv(feature_view, feature_refs, full_feature_names) - for feature_view in feature_views - ], - event_timestamp_col=event_timestamp_col, - ) - - odfvs = OnDemandFeatureView.get_requested_odfvs(feature_refs, project, registry) - - substrait_odfvs = [fv for fv in odfvs if fv.mode == "substrait"] - for odfv in substrait_odfvs: - res = odfv.transform_ibis(res, full_feature_names) - - return IbisRetrievalJob( - res, - [fv for fv in odfvs if fv.mode != "substrait"], - full_feature_names, - metadata=RetrievalMetadata( - features=feature_refs, - keys=list(set(entity_df.columns) - {event_timestamp_col}), - min_event_timestamp=timestamp_range[0], - max_event_timestamp=timestamp_range[1], - ), - ) - - @staticmethod - def pull_all_from_table_or_query( - config: RepoConfig, - data_source: DataSource, - join_key_columns: List[str], - feature_name_columns: List[str], - timestamp_field: str, - start_date: datetime, - end_date: datetime, - ) -> RetrievalJob: - assert isinstance(data_source, FileSource) - - fields = join_key_columns + feature_name_columns + [timestamp_field] - start_date = start_date.astimezone(tz=utc) - end_date = end_date.astimezone(tz=utc) - - table = IbisOfflineStore._read_data_source(data_source) - - table = table.select(*fields) - - # TODO get rid of this fix - if "__log_date" in table.columns: - table = table.drop("__log_date") - - table = table.filter( - ibis.and_( - table[timestamp_field] >= ibis.literal(start_date), - table[timestamp_field] <= ibis.literal(end_date), - ) - ) - - return IbisRetrievalJob( - table=table, - on_demand_feature_views=[], - full_feature_names=False, - metadata=None, - ) - - @staticmethod - def write_logged_features( - config: RepoConfig, - data: Union[pyarrow.Table, Path], - source: LoggingSource, - logging_config: LoggingConfig, - registry: BaseRegistry, - ): - destination = logging_config.destination - assert isinstance(destination, FileLoggingDestination) - - table = ( - ibis.read_parquet(data) if isinstance(data, Path) else ibis.memtable(data) + return ( + fv_table, + feature_view.batch_source.timestamp_field, + feature_view.batch_source.created_timestamp_column, + feature_view.projection.join_key_map + or {e.name: e.name for e in feature_view.entity_columns}, + feature_refs, + feature_view.ttl, ) - if destination.partition_by: - kwargs = {"partition_by": destination.partition_by} - else: - kwargs = {} - - # TODO always write to directory - table.to_parquet( - f"{destination.path}/{uuid.uuid4().hex}-{{i}}.parquet", **kwargs + res = point_in_time_join( + entity_table=entity_table, + feature_tables=[ + read_fv(feature_view, feature_refs, full_feature_names) + for feature_view in feature_views + ], + event_timestamp_col=event_timestamp_col, + ) + + odfvs = OnDemandFeatureView.get_requested_odfvs(feature_refs, project, registry) + + substrait_odfvs = [fv for fv in odfvs if fv.mode == "substrait"] + for odfv in substrait_odfvs: + res = odfv.transform_ibis(res, full_feature_names) + + return IbisRetrievalJob( + res, + [fv for fv in odfvs if fv.mode != "substrait"], + full_feature_names, + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(set(entity_df.columns) - {event_timestamp_col}), + min_event_timestamp=timestamp_range[0], + max_event_timestamp=timestamp_range[1], + ), + data_source_writer=data_source_writer, + ) + + +def pull_all_from_table_or_query_ibis( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + start_date: datetime, + end_date: datetime, + data_source_reader: Callable[[DataSource], Table], + data_source_writer: Callable[[pyarrow.Table, DataSource], None], +) -> RetrievalJob: + fields = join_key_columns + feature_name_columns + [timestamp_field] + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + table = data_source_reader(data_source) + + table = table.select(*fields) + + # TODO get rid of this fix + if "__log_date" in table.columns: + table = table.drop("__log_date") + + table = table.filter( + ibis.and_( + table[timestamp_field] >= ibis.literal(start_date), + table[timestamp_field] <= ibis.literal(end_date), ) + ) + + return IbisRetrievalJob( + table=table, + on_demand_feature_views=[], + full_feature_names=False, + metadata=None, + data_source_writer=data_source_writer, + ) + + +def write_logged_features_ibis( + config: RepoConfig, + data: Union[pyarrow.Table, Path], + source: LoggingSource, + logging_config: LoggingConfig, + registry: BaseRegistry, +): + destination = logging_config.destination + assert isinstance(destination, FileLoggingDestination) - @staticmethod - def offline_write_batch( - config: RepoConfig, - feature_view: FeatureView, - table: pyarrow.Table, - progress: Optional[Callable[[int], Any]], - ): - assert isinstance(feature_view.batch_source, FileSource) - - pa_schema, column_names = get_pyarrow_schema_from_batch_source( - config, feature_view.batch_source - ) - if column_names != table.column_names: - raise ValueError( - f"The input pyarrow table has schema {table.schema} with the incorrect columns {table.column_names}. " - f"The schema is expected to be {pa_schema} with the columns (in this exact order) to be {column_names}." - ) - - file_options = feature_view.batch_source.file_options - - if isinstance(feature_view.batch_source.file_format, ParquetFormat): - prev_table = ibis.read_parquet(file_options.uri).to_pyarrow() - if table.schema != prev_table.schema: - table = table.cast(prev_table.schema) - new_table = pyarrow.concat_tables([table, prev_table]) + table = ibis.read_parquet(data) if isinstance(data, Path) else ibis.memtable(data) - ibis.memtable(new_table).to_parquet(file_options.uri) - elif isinstance(feature_view.batch_source.file_format, DeltaFormat): - from deltalake import DeltaTable + if destination.partition_by: + kwargs = {"partition_by": destination.partition_by} + else: + kwargs = {} - prev_schema = DeltaTable(file_options.uri).schema().to_pyarrow() - if table.schema != prev_schema: - table = table.cast(prev_schema) - ibis.memtable(table).to_delta(file_options.uri, mode="append") + # TODO always write to directory + table.to_parquet(f"{destination.path}/{uuid.uuid4().hex}-{{i}}.parquet", **kwargs) -class IbisRetrievalJob(RetrievalJob): - def __init__( - self, table, on_demand_feature_views, full_feature_names, metadata - ) -> None: - super().__init__() - self.table = table - self._on_demand_feature_views: List[OnDemandFeatureView] = ( - on_demand_feature_views +def offline_write_batch_ibis( + config: RepoConfig, + feature_view: FeatureView, + table: pyarrow.Table, + progress: Optional[Callable[[int], Any]], + data_source_writer: Callable[[pyarrow.Table, DataSource], None], +): + pa_schema, column_names = get_pyarrow_schema_from_batch_source( + config, feature_view.batch_source + ) + if column_names != table.column_names: + raise ValueError( + f"The input pyarrow table has schema {table.schema} with the incorrect columns {table.column_names}. " + f"The schema is expected to be {pa_schema} with the columns (in this exact order) to be {column_names}." ) - self._full_feature_names = full_feature_names - self._metadata = metadata - def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: - return self.table.execute() - - def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: - return self.table.to_pyarrow() + data_source_writer(ibis.memtable(table), feature_view.batch_source) - @property - def full_feature_names(self) -> bool: - return self._full_feature_names - @property - def on_demand_feature_views(self) -> List[OnDemandFeatureView]: - return self._on_demand_feature_views - - def persist( - self, - storage: SavedDatasetStorage, - allow_overwrite: bool = False, - timeout: Optional[int] = None, - ): - assert isinstance(storage, SavedDatasetFileStorage) - if not allow_overwrite and os.path.exists(storage.file_options.uri): - raise SavedDatasetLocationAlreadyExists(location=storage.file_options.uri) - - if isinstance(storage.file_options.file_format, ParquetFormat): - filesystem, path = FileSource.create_filesystem_and_path( - storage.file_options.uri, - storage.file_options.s3_endpoint_override, - ) +def deduplicate( + table: Table, + group_by_cols: List[str], + event_timestamp_col: str, + created_timestamp_col: Optional[str], +): + order_by_fields = [ibis.desc(table[event_timestamp_col])] + if created_timestamp_col: + order_by_fields.append(ibis.desc(table[created_timestamp_col])) - if path.endswith(".parquet"): - pyarrow.parquet.write_table( - self.to_arrow(), where=path, filesystem=filesystem - ) - else: - # otherwise assume destination is directory - pyarrow.parquet.write_to_dataset( - self.to_arrow(), root_path=path, filesystem=filesystem - ) - elif isinstance(storage.file_options.file_format, DeltaFormat): - mode = ( - "overwrite" - if allow_overwrite and os.path.exists(storage.file_options.uri) - else "error" - ) - self.table.to_delta(storage.file_options.uri, mode=mode) + table = ( + table.group_by(by=group_by_cols) + .order_by(order_by_fields) + .mutate(rn=ibis.row_number()) + ) - @property - def metadata(self) -> Optional[RetrievalMetadata]: - return self._metadata + return table.filter(table["rn"] == ibis.literal(0)).drop("rn") def point_in_time_join( @@ -440,20 +386,13 @@ def point_in_time_join( feature_table = feature_table.drop(s.endswith("_y")) - order_by_fields = [ibis.desc(feature_table[timestamp_field])] - if created_timestamp_field: - order_by_fields.append(ibis.desc(feature_table[created_timestamp_field])) - - feature_table = ( - feature_table.group_by(by="entity_row_id") - .order_by(order_by_fields) - .mutate(rn=ibis.row_number()) + feature_table = deduplicate( + table=feature_table, + group_by_cols=["entity_row_id"], + event_timestamp_col=timestamp_field, + created_timestamp_col=created_timestamp_field, ) - feature_table = feature_table.filter( - feature_table["rn"] == ibis.literal(0) - ).drop("rn") - select_cols = ["entity_row_id"] select_cols.extend(feature_refs) feature_table = feature_table.select(select_cols) @@ -470,3 +409,50 @@ def point_in_time_join( acc_table = acc_table.drop("entity_row_id") return acc_table + + +class IbisRetrievalJob(RetrievalJob): + def __init__( + self, + table, + on_demand_feature_views, + full_feature_names, + metadata, + data_source_writer, + ) -> None: + super().__init__() + self.table = table + self._on_demand_feature_views: List[OnDemandFeatureView] = ( + on_demand_feature_views + ) + self._full_feature_names = full_feature_names + self._metadata = metadata + self.data_source_writer = data_source_writer + + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: + return self.table.execute() + + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: + return self.table.to_pyarrow() + + @property + def full_feature_names(self) -> bool: + return self._full_feature_names + + @property + def on_demand_feature_views(self) -> List[OnDemandFeatureView]: + return self._on_demand_feature_views + + def persist( + self, + storage: SavedDatasetStorage, + allow_overwrite: bool = False, + timeout: Optional[int] = None, + ): + self.data_source_writer( + self.table, storage.to_data_source(), "overwrite", allow_overwrite + ) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 907e4d4483..cc59804467 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -139,8 +139,10 @@ def pull_latest_from_table_or_query( assert isinstance(data_source, SnowflakeSource) from_expression = data_source.get_table_query_string() - if not data_source.database and data_source.table: + if not data_source.database and not data_source.schema and data_source.table: from_expression = f'"{config.offline_store.database}"."{config.offline_store.schema_}".{from_expression}' + if not data_source.database and data_source.schema and data_source.table: + from_expression = f'"{config.offline_store.database}".{from_expression}' if join_key_columns: partition_by_join_key_string = '"' + '", "'.join(join_key_columns) + '"' @@ -226,8 +228,10 @@ def pull_all_from_table_or_query( assert isinstance(data_source, SnowflakeSource) from_expression = data_source.get_table_query_string() - if not data_source.database and data_source.table: + if not data_source.database and not data_source.schema and data_source.table: from_expression = f'"{config.offline_store.database}"."{config.offline_store.schema_}".{from_expression}' + if not data_source.database and data_source.schema and data_source.table: + from_expression = f'"{config.offline_store.database}".{from_expression}' field_string = ( '"' diff --git a/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py b/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py new file mode 100644 index 0000000000..429327e651 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py @@ -0,0 +1,276 @@ +from __future__ import absolute_import + +import base64 +import json +import logging +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + +import pytz +from elasticsearch import Elasticsearch, helpers + +from feast import Entity, FeatureView, RepoConfig +from feast.infra.key_encoding_utils import get_list_val_str, serialize_entity_key +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import FeastConfigBaseModel + + +class ElasticSearchOnlineStoreConfig(FeastConfigBaseModel): + """ + Configuration for the ElasticSearch online store. + NOTE: The class *must* end with the `OnlineStoreConfig` suffix. + """ + + type: str = "elasticsearch" + + host: Optional[str] = None + user: Optional[str] = None + password: Optional[str] = None + port: Optional[int] = None + index: Optional[str] = None + scheme: Optional[str] = "http" + + # The number of rows to write in a single batch + write_batch_size: Optional[int] = 40 + + # The length of the vector value + vector_len: Optional[int] = 512 + + # The vector similarity metric to use in KNN search + # more details: https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html + similarity: Optional[str] = "cosine" + + +class ElasticSearchOnlineStore(OnlineStore): + _client: Optional[Elasticsearch] = None + + def _get_client(self, config: RepoConfig) -> Elasticsearch: + online_store_config = config.online_store + assert isinstance(online_store_config, ElasticSearchOnlineStoreConfig) + + user = online_store_config.user if online_store_config.user is not None else "" + password = ( + online_store_config.password + if online_store_config.password is not None + else "" + ) + + if self._client: + return self._client + else: + self._client = Elasticsearch( + hosts=[ + { + "host": online_store_config.host or "localhost", + "port": online_store_config.port or 9200, + "scheme": online_store_config.scheme or "http", + } + ], + basic_auth=(user, password), + ) + return self._client + + def _bulk_batch_actions(self, table: FeatureView, batch: List[Dict[str, Any]]): + for row in batch: + yield { + "_index": table.name, + "_id": f"{row['entity_key']}_{row['feature_name']}_{row['timestamp']}", + "_source": row, + } + + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + insert_values = [] + for entity_key, values, timestamp, created_ts in data: + entity_key_bin = serialize_entity_key( + entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + encoded_entity_key = base64.b64encode(entity_key_bin).decode("utf-8") + timestamp = _to_naive_utc(timestamp) + if created_ts is not None: + created_ts = _to_naive_utc(created_ts) + for feature_name, value in values.items(): + encoded_value = base64.b64encode(value.SerializeToString()).decode( + "utf-8" + ) + vector_val = json.loads(get_list_val_str(value)) + insert_values.append( + { + "entity_key": encoded_entity_key, + "feature_name": feature_name, + "feature_value": encoded_value, + "timestamp": timestamp, + "created_ts": created_ts, + "vector_value": vector_val, + } + ) + + batch_size = config.online_store.write_batch_size + for i in range(0, len(insert_values), batch_size): + batch = insert_values[i : i + batch_size] + actions = self._bulk_batch_actions(table, batch) + helpers.bulk(self._get_client(config), actions) + + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + if not requested_features: + body = { + "_source": {"excludes": ["vector_value"]}, + "query": {"match": {"entity_key": entity_keys}}, + } + else: + body = { + "_source": {"excludes": ["vector_value"]}, + "query": { + "bool": { + "must": [ + {"terms": {"entity_key": entity_keys}}, + {"terms": {"feature_name": requested_features}}, + ] + } + }, + } + response = self._get_client(config).search(index=table.name, body=body) + results: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + for hit in response["hits"]["hits"]: + results.append( + ( + hit["_source"]["timestamp"], + {hit["_source"]["feature_name"]: hit["_source"]["feature_value"]}, + ) + ) + return results + + def create_index(self, config: RepoConfig, table: FeatureView): + """ + Create an index in ElasticSearch for the given table. + TODO: This method can be exposed to users to customize the indexing functionality. + Args: + config: Feast repo configuration object. + table: FeatureView table for which the index needs to be created. + """ + index_mapping = { + "properties": { + "entity_key": {"type": "binary"}, + "feature_name": {"type": "keyword"}, + "feature_value": {"type": "binary"}, + "timestamp": {"type": "date"}, + "created_ts": {"type": "date"}, + "vector_value": { + "type": "dense_vector", + "dims": config.online_store.vector_len, + "index": "true", + "similarity": config.online_store.similarity, + }, + } + } + self._get_client(config).indices.create( + index=table.name, mappings=index_mapping + ) + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + # implement the update method + for table in tables_to_delete: + self._get_client(config).delete_by_query(index=table.name) + for table in tables_to_keep: + self.create_index(config, table) + + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + project = config.project + try: + for table in tables: + self._get_client(config).indices.delete(index=table.name) + except Exception as e: + logging.exception(f"Error deleting index in project {project}: {e}") + raise + + def retrieve_online_documents( + self, + config: RepoConfig, + table: FeatureView, + requested_feature: str, + embedding: List[float], + top_k: int, + *args, + **kwargs, + ) -> List[ + Tuple[ + Optional[datetime], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ]: + result: List[ + Tuple[ + Optional[datetime], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ] = [] + response = self._get_client(config).search( + index=table.name, + knn={ + "field": "vector_value", + "query_vector": embedding, + "k": top_k, + }, + ) + rows = response["hits"]["hits"][0:top_k] + for row in rows: + feature_value = row["_source"]["feature_value"] + vector_value = row["_source"]["vector_value"] + timestamp = row["_source"]["timestamp"] + distance = row["_score"] + timestamp = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%f") + + feature_value_proto = ValueProto() + feature_value_proto.ParseFromString(base64.b64decode(feature_value)) + + vector_value_proto = ValueProto(string_val=str(vector_value)) + distance_value_proto = ValueProto(float_val=distance) + result.append( + ( + timestamp, + feature_value_proto, + vector_value_proto, + distance_value_proto, + ) + ) + return result + + +def _to_naive_utc(ts: datetime): + if ts.tzinfo is None: + return ts + else: + return ts.astimezone(pytz.utc).replace(tzinfo=None) diff --git a/sdk/python/feast/infra/online_stores/contrib/elasticsearch_repo_configuration.py b/sdk/python/feast/infra/online_stores/contrib/elasticsearch_repo_configuration.py new file mode 100644 index 0000000000..4d1f2c3ca1 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/elasticsearch_repo_configuration.py @@ -0,0 +1,13 @@ +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.universal.online_store.elasticsearch import ( + ElasticSearchOnlineStoreCreator, +) + +FULL_REPO_CONFIGS = [ + IntegrationTestRepoConfig( + online_store="elasticsearch", + online_store_creator=ElasticSearchOnlineStoreCreator, + ), +] diff --git a/sdk/python/feast/infra/online_stores/contrib/postgres.py b/sdk/python/feast/infra/online_stores/contrib/postgres.py index 6ed0885d13..1043208ab3 100644 --- a/sdk/python/feast/infra/online_stores/contrib/postgres.py +++ b/sdk/python/feast/infra/online_stores/contrib/postgres.py @@ -21,6 +21,13 @@ from feast.repo_config import RepoConfig from feast.usage import log_exceptions_and_usage +SUPPORTED_DISTANCE_METRICS_DICT = { + "cosine": "<=>", + "L1": "<+>", + "L2": "<->", + "inner_product": "<#>", +} + class PostgreSQLOnlineStoreConfig(PostgreSQLConfig): type: Literal["postgres"] = "postgres" @@ -276,6 +283,7 @@ def retrieve_online_documents( requested_feature: str, embedding: List[float], top_k: int, + distance_metric: Optional[str] = "L2", ) -> List[ Tuple[ Optional[datetime], @@ -292,6 +300,7 @@ def retrieve_online_documents( requested_feature: The requested feature as the column to search embedding: The query embedding to search for top_k: The number of items to return + distance_metric: The distance metric to use for the search.G Returns: List of tuples containing the event timestamp and the document feature @@ -303,6 +312,12 @@ def retrieve_online_documents( "pgvector is not enabled in the online store configuration" ) + if distance_metric not in SUPPORTED_DISTANCE_METRICS_DICT: + raise ValueError( + f"Distance metric {distance_metric} is not supported. Supported distance metrics are {SUPPORTED_DISTANCE_METRICS_DICT.keys()}" + ) + + distance_metric_sql = SUPPORTED_DISTANCE_METRICS_DICT[distance_metric] # Convert the embedding to a string to be used in postgres vector search query_embedding_str = f"[{','.join(str(el) for el in embedding)}]" @@ -327,13 +342,14 @@ def retrieve_online_documents( feature_name, value, vector_value, - vector_value <-> %s as distance, + vector_value {distance_metric_sql} %s as distance, event_ts FROM {table_name} WHERE feature_name = {feature_name} ORDER BY distance LIMIT {top_k}; """ ).format( + distance_metric_sql=distance_metric_sql, table_name=sql.Identifier(table_name), feature_name=sql.Literal(requested_feature), top_k=sql.Literal(top_k), diff --git a/sdk/python/feast/infra/online_stores/datastore.py b/sdk/python/feast/infra/online_stores/datastore.py index 149354b472..bf44a74966 100644 --- a/sdk/python/feast/infra/online_stores/datastore.py +++ b/sdk/python/feast/infra/online_stores/datastore.py @@ -80,6 +80,9 @@ class DatastoreOnlineStoreConfig(FeastConfigBaseModel): namespace: Optional[StrictStr] = None """ (optional) Datastore namespace """ + database: Optional[StrictStr] = None + """ (optional) Firestore database """ + write_concurrency: Optional[PositiveInt] = 40 """ (optional) Amount of threads to use when writing batches of feature rows into Datastore""" @@ -155,7 +158,9 @@ def teardown( def _get_client(self, online_config: DatastoreOnlineStoreConfig): if not self._client: self._client = _initialize_client( - online_config.project_id, online_config.namespace + online_config.project_id, + online_config.namespace, + online_config.database, ) return self._client @@ -344,11 +349,14 @@ def worker(shared_counter): def _initialize_client( - project_id: Optional[str], namespace: Optional[str] + project_id: Optional[str], namespace: Optional[str], database: Optional[str] ) -> datastore.Client: try: client = datastore.Client( - project=project_id, namespace=namespace, client_info=get_http_client_info() + project=project_id, + namespace=namespace, + database=database, + client_info=get_http_client_info(), ) return client except DefaultCredentialsError as e: @@ -368,11 +376,13 @@ class DatastoreTable(InfraObject): name: The name of the table. project_id (optional): The GCP project id. namespace (optional): Datastore namespace. + database (optional): Firestore database. """ project: str project_id: Optional[str] namespace: Optional[str] + database: Optional[str] def __init__( self, @@ -380,11 +390,13 @@ def __init__( name: str, project_id: Optional[str] = None, namespace: Optional[str] = None, + database: Optional[str] = None, ): super().__init__(name) self.project = project self.project_id = project_id self.namespace = namespace + self.database = database def to_infra_object_proto(self) -> InfraObjectProto: datastore_table_proto = self.to_proto() @@ -401,6 +413,8 @@ def to_proto(self) -> Any: datastore_table_proto.project_id.value = self.project_id if self.namespace: datastore_table_proto.namespace.value = self.namespace + if self.database: + datastore_table_proto.database.value = self.database return datastore_table_proto @staticmethod @@ -410,7 +424,7 @@ def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: name=infra_object_proto.datastore_table.name, ) - # Distinguish between null and empty string, since project_id and namespace are StringValues. + # Distinguish between null and empty string, since project_id, namespace and database are StringValues. if infra_object_proto.datastore_table.HasField("project_id"): datastore_table.project_id = ( infra_object_proto.datastore_table.project_id.value @@ -419,6 +433,8 @@ def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: datastore_table.namespace = ( infra_object_proto.datastore_table.namespace.value ) + if infra_object_proto.datastore_table.HasField("database"): + datastore_table.database = infra_object_proto.datastore_table.database.value return datastore_table @@ -434,11 +450,13 @@ def from_proto(datastore_table_proto: DatastoreTableProto) -> Any: datastore_table.project_id = datastore_table_proto.project_id.value if datastore_table_proto.HasField("namespace"): datastore_table.namespace = datastore_table_proto.namespace.value + if datastore_table_proto.HasField("database"): + datastore_table.database = datastore_table_proto.database.value return datastore_table def update(self): - client = _initialize_client(self.project_id, self.namespace) + client = _initialize_client(self.project_id, self.namespace, self.database) key = client.key("Project", self.project, "Table", self.name) entity = datastore.Entity( key=key, exclude_from_indexes=("created_ts", "event_ts", "values") @@ -447,7 +465,7 @@ def update(self): client.put(entity) def teardown(self): - client = _initialize_client(self.project_id, self.namespace) + client = _initialize_client(self.project_id, self.namespace, self.database) key = client.key("Project", self.project, "Table", self.name) _delete_all_values(client, key) diff --git a/sdk/python/feast/infra/online_stores/online_store.py b/sdk/python/feast/infra/online_stores/online_store.py index 67c5a931dd..05983a494c 100644 --- a/sdk/python/feast/infra/online_stores/online_store.py +++ b/sdk/python/feast/infra/online_stores/online_store.py @@ -80,6 +80,31 @@ def online_read( """ pass + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + """ + Reads features values for the given entity keys asynchronously. + + Args: + config: The config for the current feature store. + table: The feature view whose feature values should be read. + entity_keys: The list of entity keys for which feature values should be read. + requested_features: The list of features that should be read. + + Returns: + A list of the same length as entity_keys. Each item in the list is a tuple where the first + item is the event timestamp for the row, and the second item is a dict mapping feature names + to values, which are returned in proto format. + """ + raise NotImplementedError( + f"Online store {self.__class__.__name__} does not support online read async" + ) + @abstractmethod def update( self, @@ -142,6 +167,7 @@ def retrieve_online_documents( requested_feature: str, embedding: List[float], top_k: int, + distance_metric: Optional[str] = None, ) -> List[ Tuple[ Optional[datetime], @@ -154,11 +180,12 @@ def retrieve_online_documents( Retrieves online feature values for the specified embeddings. Args: + distance_metric: distance metric to use for retrieval. config: The config for the current feature store. table: The feature view whose feature values should be read. requested_feature: The name of the feature whose embeddings should be used for retrieval. embedding: The embeddings to use for retrieval. - top_k: The number of nearest neighbors to retrieve. + top_k: The number of documents to retrieve. Returns: object: A list of top k closest documents to the specified embedding. Each item in the list is a tuple diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 6f6c2fb45c..f681d8473e 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -42,6 +42,7 @@ try: from redis import Redis + from redis import asyncio as redis_asyncio from redis.cluster import ClusterNode, RedisCluster from redis.sentinel import Sentinel except ImportError as e: @@ -90,6 +91,9 @@ class RedisOnlineStore(OnlineStore): """ _client: Optional[Union[Redis, RedisCluster]] = None + _client_async: Optional[Union[redis_asyncio.Redis, redis_asyncio.RedisCluster]] = ( + None + ) def delete_entity_values(self, config: RepoConfig, join_keys: List[str]): client = self._get_client(config.online_store) @@ -234,6 +238,30 @@ def _get_client(self, online_store_config: RedisOnlineStoreConfig): self._client = Redis(**kwargs) return self._client + async def _get_client_async(self, online_store_config: RedisOnlineStoreConfig): + if not self._client_async: + startup_nodes, kwargs = self._parse_connection_string( + online_store_config.connection_string + ) + if online_store_config.redis_type == RedisType.redis_cluster: + kwargs["startup_nodes"] = [ + redis_asyncio.cluster.ClusterNode(**node) for node in startup_nodes + ] + self._client_async = redis_asyncio.RedisCluster(**kwargs) + elif online_store_config.redis_type == RedisType.redis_sentinel: + sentinel_hosts = [] + for item in startup_nodes: + sentinel_hosts.append((item["host"], int(item["port"]))) + + sentinel = redis_asyncio.Sentinel(sentinel_hosts, **kwargs) + master = sentinel.master_for(online_store_config.sentinel_master) + self._client_async = master + else: + kwargs["host"] = startup_nodes[0]["host"] + kwargs["port"] = startup_nodes[0]["port"] + self._client_async = redis_asyncio.Redis(**kwargs) + return self._client_async + @log_exceptions_and_usage(online_store="redis") def online_write_batch( self, @@ -304,6 +332,49 @@ def online_write_batch( if progress: progress(len(results)) + def _generate_redis_keys_for_entities( + self, config: RepoConfig, entity_keys: List[EntityKeyProto] + ) -> List[bytes]: + keys = [] + for entity_key in entity_keys: + redis_key_bin = _redis_key( + config.project, + entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + keys.append(redis_key_bin) + return keys + + def _generate_hset_keys_for_features( + self, + feature_view: FeatureView, + requested_features: Optional[List[str]] = None, + ) -> Tuple[List[str], List[str]]: + if not requested_features: + requested_features = [f.name for f in feature_view.features] + + hset_keys = [_mmh3(f"{feature_view.name}:{k}") for k in requested_features] + + ts_key = f"_ts:{feature_view.name}" + hset_keys.append(ts_key) + requested_features.append(ts_key) + + return requested_features, hset_keys + + def _convert_redis_values_to_protobuf( + self, + redis_values: List[List[ByteString]], + feature_view: str, + requested_features: List[str], + ): + result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + for values in redis_values: + features = self._get_features_for_entity( + values, feature_view, requested_features + ) + result.append(features) + return result + @log_exceptions_and_usage(online_store="redis") def online_read( self, @@ -316,39 +387,51 @@ def online_read( assert isinstance(online_store_config, RedisOnlineStoreConfig) client = self._get_client(online_store_config) - feature_view = table.name - project = config.project + feature_view = table - result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + requested_features, hset_keys = self._generate_hset_keys_for_features( + feature_view, requested_features + ) + keys = self._generate_redis_keys_for_entities(config, entity_keys) - if not requested_features: - requested_features = [f.name for f in table.features] + with client.pipeline(transaction=False) as pipe: + for redis_key_bin in keys: + pipe.hmget(redis_key_bin, hset_keys) + with tracing_span(name="remote_call"): + redis_values = pipe.execute() - hset_keys = [_mmh3(f"{feature_view}:{k}") for k in requested_features] + return self._convert_redis_values_to_protobuf( + redis_values, feature_view.name, requested_features + ) - ts_key = f"_ts:{feature_view}" - hset_keys.append(ts_key) - requested_features.append(ts_key) + @log_exceptions_and_usage(online_store="redis") + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + online_store_config = config.online_store + assert isinstance(online_store_config, RedisOnlineStoreConfig) - keys = [] - for entity_key in entity_keys: - redis_key_bin = _redis_key( - project, - entity_key, - entity_key_serialization_version=config.entity_key_serialization_version, - ) - keys.append(redis_key_bin) - with client.pipeline(transaction=False) as pipe: + client = await self._get_client_async(online_store_config) + feature_view = table + + requested_features, hset_keys = self._generate_hset_keys_for_features( + feature_view, requested_features + ) + keys = self._generate_redis_keys_for_entities(config, entity_keys) + + async with client.pipeline(transaction=False) as pipe: for redis_key_bin in keys: pipe.hmget(redis_key_bin, hset_keys) with tracing_span(name="remote_call"): - redis_values = pipe.execute() - for values in redis_values: - features = self._get_features_for_entity( - values, feature_view, requested_features - ) - result.append(features) - return result + redis_values = await pipe.execute() + + return self._convert_redis_values_to_protobuf( + redis_values, feature_view.name, requested_features + ) def _get_features_for_entity( self, diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 6476acbcb9..48d2f8ef18 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -188,6 +188,22 @@ def online_read( ) return result + @log_exceptions_and_usage(sampler=RatioSampler(ratio=0.001)) + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List: + set_usage_attribute("provider", self.__class__.__name__) + result = [] + if self.online_store: + result = await self.online_store.online_read_async( + config, table, entity_keys, requested_features + ) + return result + @log_exceptions_and_usage(sampler=RatioSampler(ratio=0.001)) def retrieve_online_documents( self, @@ -196,12 +212,18 @@ def retrieve_online_documents( requested_feature: str, query: List[float], top_k: int, + distance_metric: Optional[str] = None, ) -> List: set_usage_attribute("provider", self.__class__.__name__) result = [] if self.online_store: result = self.online_store.retrieve_online_documents( - config, table, requested_feature, query, top_k + config, + table, + requested_feature, + query, + top_k, + distance_metric, ) return result diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index a45051a1b6..22f6088474 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -229,6 +229,30 @@ def online_read( """ pass + @abstractmethod + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + """ + Reads features values for the given entity keys asynchronously. + + Args: + config: The config for the current feature store. + table: The feature view whose feature values should be read. + entity_keys: The list of entity keys for which feature values should be read. + requested_features: The list of features that should be read. + + Returns: + A list of the same length as entity_keys. Each item in the list is a tuple where the first + item is the event timestamp for the row, and the second item is a dict mapping feature names + to values, which are returned in proto format. + """ + pass + @abstractmethod def retrieve_saved_dataset( self, config: RepoConfig, dataset: SavedDataset @@ -303,6 +327,7 @@ def retrieve_online_documents( requested_feature: str, query: List[float], top_k: int, + distance_metric: Optional[str] = None, ) -> List[ Tuple[ Optional[datetime], @@ -312,14 +337,15 @@ def retrieve_online_documents( ] ]: """ - Searches for the top-k nearest neighbors of the given document in the online document store. + Searches for the top-k most similar documents in the online document store. Args: + distance_metric: distance metric to use for the search. config: The config for the current feature store. table: The feature view whose embeddings should be searched. requested_feature: the requested document feature name. query: The query embedding to search for. - top_k: The number of nearest neighbors to return. + top_k: The number of documents to return. Returns: A list of dictionaries, where each dictionary contains the document feature. diff --git a/sdk/python/feast/online_response.py b/sdk/python/feast/online_response.py index 050b374340..a4e5694127 100644 --- a/sdk/python/feast/online_response.py +++ b/sdk/python/feast/online_response.py @@ -50,7 +50,7 @@ def to_dict(self, include_event_timestamps: bool = False) -> Dict[str, Any]: Converts GetOnlineFeaturesResponse features into a dictionary form. Args: - is_with_event_timestamps: bool Optionally include feature timestamps in the dictionary + include_event_timestamps: bool Optionally include feature timestamps in the dictionary """ response: Dict[str, List[Any]] = {} @@ -74,7 +74,7 @@ def to_df(self, include_event_timestamps: bool = False) -> pd.DataFrame: Converts GetOnlineFeaturesResponse features into Panda dataframe form. Args: - is_with_event_timestamps: bool Optionally include feature timestamps in the dataframe + include_event_timestamps: bool Optionally include feature timestamps in the dataframe """ return pd.DataFrame(self.to_dict(include_event_timestamps)) @@ -84,7 +84,7 @@ def to_arrow(self, include_event_timestamps: bool = False) -> pa.Table: Converts GetOnlineFeaturesResponse features into pyarrow Table. Args: - is_with_event_timestamps: bool Optionally include feature timestamps in the table + include_event_timestamps: bool Optionally include feature timestamps in the table """ return pa.Table.from_pydict(self.to_dict(include_event_timestamps)) diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 9311dfdef0..00e8413f23 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -64,6 +64,7 @@ "rockset": "feast.infra.online_stores.contrib.rockset_online_store.rockset.RocksetOnlineStore", "hazelcast": "feast.infra.online_stores.contrib.hazelcast_online_store.hazelcast_online_store.HazelcastOnlineStore", "ikv": "feast.infra.online_stores.contrib.ikv_online_store.ikv.IKVOnlineStore", + "elasticsearch": "feast.infra.online_stores.contrib.elasticsearch.ElasticSearchOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 452b52c73a..e7ca9ca35b 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.10-ci-requirements.txt alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -25,7 +21,6 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 - # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 @@ -43,10 +38,8 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 - # via feast (setup.py) azure-storage-blob==12.19.1 - # via feast (setup.py) -babel==2.14.0 +babel==2.15.0 # via # jupyterlab-server # sphinx @@ -56,25 +49,20 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.88 - # via - # feast (setup.py) - # moto -botocore==1.34.88 +boto3==1.34.99 + # via moto +botocore==1.34.99 # via # boto3 # moto # s3transfer build==1.2.1 - # via - # feast (setup.py) - # pip-tools + # via pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 - # via feast (setup.py) certifi==2024.2.2 # via # httpcore @@ -97,28 +85,25 @@ charset-normalizer==3.3.2 click==8.1.7 # via # dask - # feast (setup.py) # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via - # feast (setup.py) - # great-expectations + # via great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.4.4 +coverage[toml]==7.5.1 # via pytest-cov -cryptography==42.0.5 +cryptography==42.0.7 # via # azure-identity # azure-storage-blob - # feast (setup.py) # great-expectations # moto # msal @@ -127,11 +112,9 @@ cryptography==42.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.12 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -141,24 +124,24 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.16.4 - # via feast (setup.py) +deltalake==0.17.3 dill==0.3.8 - # via feast (setup.py) distlib==0.3.8 # via virtualenv +dnspython==2.6.1 + # via email-validator docker==7.0.0 - # via - # feast (setup.py) - # testcontainers + # via testcontainers docutils==0.19 # via sphinx duckdb==0.10.2 # via # duckdb-engine # ibis-framework -duckdb-engine==0.11.5 +duckdb-engine==0.12.0 # via ibis-framework +email-validator==2.1.1 + # via fastapi entrypoints==0.4 # via altair exceptiongroup==1.2.1 @@ -170,29 +153,27 @@ execnet==2.1.1 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via fastapi-cli +fastapi-cli==0.0.2 + # via fastapi fastjsonschema==2.19.1 # via nbformat -filelock==3.13.4 +filelock==3.14.0 # via # snowflake-connector-python # virtualenv firebase-admin==5.4.0 - # via feast (setup.py) fqdn==1.5.1 # via jsonschema fsspec==2023.12.2 - # via - # dask - # feast (setup.py) + # via dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.18.0 +google-api-core[grpc]==2.19.0 # via - # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -202,13 +183,14 @@ google-api-core[grpc]==2.18.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.126.0 +google-api-python-client==2.128.0 # via firebase-admin google-auth==2.29.0 # via # google-api-core # google-api-python-client # google-auth-httplib2 + # google-cloud-bigquery-storage # google-cloud-core # google-cloud-firestore # google-cloud-storage @@ -216,11 +198,8 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) -google-cloud-bigquery-storage==2.24.0 - # via feast (setup.py) +google-cloud-bigquery-storage==2.25.0 google-cloud-bigtable==2.23.1 - # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -229,13 +208,10 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 - # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via - # feast (setup.py) - # firebase-admin + # via firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -246,19 +222,16 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via - # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.12 - # via feast (setup.py) +great-expectations==0.18.13 greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.62.2 +grpcio==1.63.0 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -269,27 +242,19 @@ grpcio==1.62.2 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 - # via feast (setup.py) grpcio-reflection==1.62.2 - # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 - # via feast (setup.py) grpcio-tools==1.62.2 - # via feast (setup.py) -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 - # via feast (setup.py) hazelcast-python-client==5.3.0 - # via feast (setup.py) hiredis==2.3.2 - # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -300,19 +265,17 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via - # feast (setup.py) + # fastapi # jupyterlab ibis-framework[duckdb]==8.0.0 - # via - # feast (setup.py) - # ibis-substrait + # via ibis-substrait ibis-substrait==3.2.0 - # via feast (setup.py) -identify==2.5.35 +identify==2.5.36 # via pre-commit idna==3.7 # via # anyio + # email-validator # httpx # jsonschema # requests @@ -325,7 +288,7 @@ iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.23.0 +ipython==8.24.0 # via # great-expectations # ipykernel @@ -338,10 +301,10 @@ isoduration==20.11.0 # via jsonschema jedi==0.19.1 # via ipython -jinja2==3.1.3 +jinja2==3.1.4 # via # altair - # feast (setup.py) + # fastapi # great-expectations # jupyter-server # jupyterlab @@ -361,10 +324,9 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.21.1 +jsonschema[format-nongpl]==4.22.0 # via # altair - # feast (setup.py) # great-expectations # jupyter-events # jupyterlab-server @@ -398,18 +360,17 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.6 +jupyterlab==4.1.8 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.26.0 +jupyterlab-server==2.27.1 # via # jupyterlab # notebook jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 - # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -421,7 +382,7 @@ markupsafe==2.1.5 # jinja2 # nbconvert # werkzeug -marshmallow==3.21.1 +marshmallow==3.21.2 # via great-expectations matplotlib-inline==0.1.7 # via @@ -430,17 +391,13 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 - # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 - # via feast (setup.py) mock==2.0.0 - # via feast (setup.py) moto==4.2.14 - # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -451,17 +408,14 @@ msgpack==1.0.8 # via cachecontrol multipledispatch==1.0.0 # via ibis-framework -mypy==1.9.0 - # via - # feast (setup.py) - # sqlalchemy +mypy==1.10.0 + # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 - # via feast (setup.py) nbclient==0.10.0 # via nbconvert -nbconvert==7.16.3 +nbconvert==7.16.4 # via jupyter-server nbformat==5.10.4 # via @@ -484,7 +438,6 @@ numpy==1.26.4 # altair # dask # db-dtypes - # feast (setup.py) # great-expectations # ibis-framework # pandas @@ -492,6 +445,8 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib +orjson==3.10.3 + # via fastapi overrides==7.7.0 # via jupyter-server packaging==24.0 @@ -521,7 +476,6 @@ pandas==2.2.2 # dask # dask-expr # db-dtypes - # feast (setup.py) # google-cloud-bigquery # great-expectations # ibis-framework @@ -532,27 +486,27 @@ parso==0.8.4 # via jedi parsy==2.1 # via ibis-framework -partd==1.4.1 +partd==1.4.2 # via dask pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython +pip==24.0 + # via pip-tools pip-tools==7.4.1 - # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest ply==3.11 # via thriftpy2 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 - # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server prompt-toolkit==3.0.43 @@ -567,7 +521,6 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -585,11 +538,8 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via - # feast (setup.py) - # ipykernel + # via ipykernel psycopg2-binary==2.9.9 - # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -597,7 +547,6 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 - # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 @@ -607,7 +556,6 @@ pyarrow==15.0.2 # dask-expr # db-dtypes # deltalake - # feast (setup.py) # google-cloud-bigquery # ibis-framework # snowflake-connector-python @@ -622,19 +570,16 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 - # via feast (setup.py) pycparser==2.22 # via cffi -pydantic==2.7.0 +pydantic==2.7.1 # via # fastapi - # feast (setup.py) # great-expectations -pydantic-core==2.18.1 +pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 +pygments==2.18.0 # via - # feast (setup.py) # ipython # nbconvert # rich @@ -644,26 +589,21 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 - # via feast (setup.py) pymysql==1.1.0 - # via feast (setup.py) pyodbc==5.1.0 - # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 # via # great-expectations # httplib2 -pyproject-hooks==1.0.0 +pyproject-hooks==1.1.0 # via # build # pip-tools pyspark==3.5.1 - # via feast (setup.py) pytest==7.4.4 # via - # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -673,21 +613,13 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 - # via feast (setup.py) pytest-cov==5.0.0 - # via feast (setup.py) pytest-env==1.1.3 - # via feast (setup.py) pytest-lazy-fixture==0.6.3 - # via feast (setup.py) pytest-mock==1.10.4 - # via feast (setup.py) pytest-ordering==0.6 - # via feast (setup.py) pytest-timeout==1.4.2 - # via feast (setup.py) -pytest-xdist==3.6.0 - # via feast (setup.py) +pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 # via # arrow @@ -705,6 +637,8 @@ python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via # great-expectations @@ -715,33 +649,29 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # ibis-substrait # jupyter-events # kubernetes # pre-commit # responses # uvicorn -pyzmq==26.0.2 +pyzmq==26.0.3 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 - # via feast (setup.py) -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.16 - # via feast (setup.py) +regex==2024.4.28 requests==2.31.0 # via # azure-core # cachecontrol # docker - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-storage @@ -768,10 +698,11 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.7.1 - # via ibis-framework -rockset==2.1.1 - # via feast (setup.py) -rpds-py==0.18.0 + # via + # ibis-framework + # typer +rockset==2.1.2 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -779,14 +710,21 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.1 - # via feast (setup.py) +ruff==0.4.3 s3transfer==0.10.1 # via boto3 scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +setuptools==69.5.1 + # via + # grpcio-tools + # kubernetes + # nodeenv + # pip-tools +shellingham==1.5.4 + # via typer six==1.16.0 # via # asttokens @@ -806,14 +744,12 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.9.0 - # via feast (setup.py) +snowflake-connector-python[pandas]==3.10.0 sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -826,12 +762,10 @@ sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==2.0.29 +sqlalchemy[mypy]==2.0.30 # via # duckdb-engine - # feast (setup.py) # ibis-framework - # sqlalchemy # sqlalchemy-views sqlalchemy-views==0.3.2 # via ibis-framework @@ -841,24 +775,20 @@ stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.16.0 +substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -testcontainers==4.3.3 - # via feast (setup.py) -thriftpy2==0.4.20 +testcontainers==4.4.0 +thriftpy2==0.5.0 # via happybase -tinycss2==1.2.1 +tinycss2==1.3.0 # via nbconvert toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via # build @@ -866,7 +796,6 @@ tomli==2.0.1 # jupyterlab # mypy # pip-tools - # pyproject-hooks # pytest # pytest-env tomlkit==0.12.4 @@ -885,10 +814,8 @@ tornado==6.4 # jupyterlab # notebook # terminado -tqdm==4.66.2 - # via - # feast (setup.py) - # great-expectations +tqdm==4.66.4 + # via great-expectations traitlets==5.14.3 # via # comm @@ -905,37 +832,25 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) +typer==0.12.3 + # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via - # feast (setup.py) - # mypy-protobuf -types-pymysql==1.1.0.1 - # via feast (setup.py) -types-pyopenssl==24.0.0.20240417 + # via mypy-protobuf +types-pymysql==1.1.0.20240425 +types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via - # arrow - # feast (setup.py) + # via arrow types-pytz==2024.1.0.20240417 - # via feast (setup.py) types-pyyaml==6.0.12.20240311 - # via feast (setup.py) -types-redis==4.6.0.20240417 - # via feast (setup.py) +types-redis==4.6.0.20240425 types-requests==2.30.0.0 - # via feast (setup.py) -types-setuptools==69.5.0.20240415 - # via - # feast (setup.py) - # types-cffi +types-setuptools==69.5.0.20240423 + # via types-cffi types-tabulate==0.9.0.20240106 - # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 @@ -955,6 +870,7 @@ typing-extensions==4.11.0 # sqlalchemy # testcontainers # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas @@ -962,6 +878,8 @@ tzlocal==5.2 # via # great-expectations # trino +ujson==5.9.0 + # via fastapi uri-template==1.3.0 # via jsonschema uritemplate==4.1.1 @@ -970,7 +888,6 @@ urllib3==1.26.18 # via # botocore # docker - # feast (setup.py) # great-expectations # kubernetes # minio @@ -979,13 +896,13 @@ urllib3==1.26.18 # rockset # testcontainers uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via - # feast (setup.py) - # pre-commit + # via pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -996,13 +913,13 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.7.0 +websocket-client==1.8.0 # via # jupyter-server # kubernetes websockets==12.0 # via uvicorn -werkzeug==3.0.2 +werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools @@ -1014,7 +931,3 @@ xmltodict==0.13.0 # via moto zipp==3.18.1 # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 9f90db249a..56a8259ab4 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -1,13 +1,10 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.10-requirements.txt annotated-types==0.6.0 # via pydantic anyio==4.3.0 # via + # httpx # starlette # watchfiles attrs==23.2.0 @@ -15,70 +12,84 @@ attrs==23.2.0 # jsonschema # referencing certifi==2024.2.2 - # via requests + # via + # httpcore + # httpx + # requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via # dask - # feast (setup.py) + # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via feast (setup.py) -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.12 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask dill==0.3.8 - # via feast (setup.py) +dnspython==2.6.1 + # via email-validator +email-validator==2.1.1 + # via fastapi exceptiongroup==1.2.1 # via anyio -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via fastapi-cli +fastapi-cli==0.0.2 + # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 # via sqlalchemy -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 - # via uvicorn + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx httptools==0.6.1 # via uvicorn +httpx==0.27.0 + # via fastapi idna==3.7 # via # anyio + # email-validator + # httpx # requests importlib-metadata==7.1.0 # via dask -jinja2==3.1.3 - # via feast (setup.py) -jsonschema==4.21.1 - # via feast (setup.py) +jinja2==3.1.4 + # via fastapi +jsonschema==4.22.0 jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 +mdurl==0.1.2 + # via markdown-it-py mmh3==4.1.0 - # via feast (setup.py) -mypy==1.9.0 +mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 - # via feast (setup.py) numpy==1.26.4 # via # dask - # feast (setup.py) # pandas # pyarrow +orjson==3.10.3 + # via fastapi packaging==24.0 # via # dask @@ -87,73 +98,66 @@ pandas==2.2.2 # via # dask # dask-expr - # feast (setup.py) -partd==1.4.1 +partd==1.4.2 # via dask protobuf==4.25.3 - # via - # feast (setup.py) - # mypy-protobuf -pyarrow==15.0.2 - # via - # dask-expr - # feast (setup.py) -pydantic==2.7.0 - # via - # fastapi - # feast (setup.py) -pydantic-core==2.18.1 + # via mypy-protobuf +pyarrow==16.0.0 + # via dask-expr +pydantic==2.7.1 + # via fastapi +pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 - # via feast (setup.py) +pygments==2.18.0 + # via rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via pandas pyyaml==6.0.1 # via # dask - # feast (setup.py) # uvicorn -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 - # via feast (setup.py) -rpds-py==0.18.0 +rich==13.7.1 + # via typer +rpds-py==0.18.1 # via # jsonschema # referencing +shellingham==1.5.4 + # via typer six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anyio -sqlalchemy[mypy]==2.0.29 # via - # feast (setup.py) - # sqlalchemy + # anyio + # httpx +sqlalchemy[mypy]==2.0.30 starlette==0.37.2 # via fastapi tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via mypy toolz==0.12.1 # via # dask # partd -tqdm==4.66.2 - # via feast (setup.py) +tqdm==4.66.4 typeguard==4.2.1 - # via feast (setup.py) -types-protobuf==5.26.0.20240420 +typer==0.12.3 + # via fastapi-cli +types-protobuf==5.26.0.20240422 # via mypy-protobuf typing-extensions==4.11.0 # via @@ -164,13 +168,18 @@ typing-extensions==4.11.0 # pydantic-core # sqlalchemy # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas +ujson==5.9.0 + # via fastapi urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli uvloop==0.19.0 # via uvicorn watchfiles==0.21.0 @@ -178,4 +187,4 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.18.1 - # via importlib-metadata + # via importlib-metadata \ No newline at end of file diff --git a/sdk/python/requirements/py3.11-ci-requirements.txt b/sdk/python/requirements/py3.11-ci-requirements.txt index 71f61964be..3b76237f59 100644 --- a/sdk/python/requirements/py3.11-ci-requirements.txt +++ b/sdk/python/requirements/py3.11-ci-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --extra=ci --output-file=sdk/python/requirements/py3.11-ci-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.11-ci-requirements.txt alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -16,10 +12,6 @@ anyio==4.3.0 # jupyter-server # starlette # watchfiles -appdirs==1.4.4 - # via fissix -appnope==0.1.4 - # via ipykernel argon2-cffi==23.1.0 # via jupyter-server argon2-cffi-bindings==21.2.0 @@ -29,18 +21,14 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 - # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 # via jupyterlab -async-timeout==4.0.3 - # via redis atpublic==4.1.0 # via ibis-framework attrs==23.2.0 # via - # bowler # jsonschema # referencing azure-core==1.30.1 @@ -48,10 +36,8 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 - # via feast (setup.py) azure-storage-blob==12.19.1 - # via feast (setup.py) -babel==2.14.0 +babel==2.15.0 # via # jupyterlab-server # sphinx @@ -61,27 +47,20 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.93 - # via - # feast (setup.py) - # moto -botocore==1.34.93 +boto3==1.34.99 + # via moto +botocore==1.34.99 # via # boto3 # moto # s3transfer -bowler==0.9.0 - # via feast (setup.py) build==1.2.1 - # via - # feast (setup.py) - # pip-tools + # via pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 - # via feast (setup.py) certifi==2024.2.2 # via # httpcore @@ -103,31 +82,26 @@ charset-normalizer==3.3.2 # snowflake-connector-python click==8.1.7 # via - # bowler # dask - # feast (setup.py) # geomet # great-expectations - # moreorless # pip-tools + # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via - # feast (setup.py) - # great-expectations + # via great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.0 +coverage[toml]==7.5.1 # via pytest-cov -cryptography==42.0.5 +cryptography==42.0.7 # via # azure-identity # azure-storage-blob - # feast (setup.py) # great-expectations # moto # msal @@ -136,11 +110,9 @@ cryptography==42.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.13 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -150,16 +122,14 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.17.2 - # via feast (setup.py) +deltalake==0.17.3 dill==0.3.8 - # via feast (setup.py) distlib==0.3.8 # via virtualenv +dnspython==2.6.1 + # via email-validator docker==7.0.0 - # via - # feast (setup.py) - # testcontainers + # via testcontainers docutils==0.19 # via sphinx duckdb==0.10.2 @@ -168,19 +138,18 @@ duckdb==0.10.2 # ibis-framework duckdb-engine==0.12.0 # via ibis-framework +email-validator==2.1.1 + # via fastapi entrypoints==0.4 # via altair -exceptiongroup==1.2.1 - # via - # anyio - # ipython - # pytest execnet==2.1.1 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via fastapi-cli +fastapi-cli==0.0.2 + # via fastapi fastjsonschema==2.19.1 # via nbformat filelock==3.14.0 @@ -188,22 +157,16 @@ filelock==3.14.0 # snowflake-connector-python # virtualenv firebase-admin==5.4.0 - # via feast (setup.py) -fissix==24.4.24 - # via bowler fqdn==1.5.1 # via jsonschema fsspec==2023.12.2 - # via - # dask - # feast (setup.py) + # via dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.18.0 +google-api-core[grpc]==2.19.0 # via - # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -213,13 +176,14 @@ google-api-core[grpc]==2.18.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.127.0 +google-api-python-client==2.128.0 # via firebase-admin google-auth==2.29.0 # via # google-api-core # google-api-python-client # google-auth-httplib2 + # google-cloud-bigquery-storage # google-cloud-core # google-cloud-firestore # google-cloud-storage @@ -227,11 +191,8 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) -google-cloud-bigquery-storage==2.24.0 - # via feast (setup.py) +google-cloud-bigquery-storage==2.25.0 google-cloud-bigtable==2.23.1 - # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -240,13 +201,10 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 - # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via - # feast (setup.py) - # firebase-admin + # via firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -257,19 +215,16 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via - # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.12 - # via feast (setup.py) +great-expectations==0.18.13 greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.62.2 +grpcio==1.63.0 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -280,27 +235,19 @@ grpcio==1.62.2 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 - # via feast (setup.py) grpcio-reflection==1.62.2 - # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 - # via feast (setup.py) grpcio-tools==1.62.2 - # via feast (setup.py) -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 - # via feast (setup.py) hazelcast-python-client==5.3.0 - # via feast (setup.py) hiredis==2.3.2 - # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -311,19 +258,17 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via - # feast (setup.py) + # fastapi # jupyterlab ibis-framework[duckdb]==8.0.0 - # via - # feast (setup.py) - # ibis-substrait + # via ibis-substrait ibis-substrait==3.2.0 - # via feast (setup.py) identify==2.5.36 # via pre-commit idna==3.7 # via # anyio + # email-validator # httpx # jsonschema # requests @@ -331,21 +276,12 @@ idna==3.7 imagesize==1.4.1 # via sphinx importlib-metadata==7.1.0 - # via - # build - # dask - # jupyter-client - # jupyter-lsp - # jupyterlab - # jupyterlab-server - # nbconvert - # sphinx - # typeguard + # via dask iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.18.1 +ipython==8.24.0 # via # great-expectations # ipykernel @@ -358,10 +294,10 @@ isoduration==20.11.0 # via jsonschema jedi==0.19.1 # via ipython -jinja2==3.1.3 +jinja2==3.1.4 # via # altair - # feast (setup.py) + # fastapi # great-expectations # jupyter-server # jupyterlab @@ -381,10 +317,9 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.21.1 +jsonschema[format-nongpl]==4.22.0 # via # altair - # feast (setup.py) # great-expectations # jupyter-events # jupyterlab-server @@ -429,7 +364,6 @@ jupyterlab-server==2.27.1 jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 - # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -441,7 +375,7 @@ markupsafe==2.1.5 # jinja2 # nbconvert # werkzeug -marshmallow==3.21.1 +marshmallow==3.21.2 # via great-expectations matplotlib-inline==0.1.7 # via @@ -450,19 +384,13 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 - # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 - # via feast (setup.py) mock==2.0.0 - # via feast (setup.py) -moreorless==0.4.0 - # via bowler moto==4.2.14 - # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -474,13 +402,10 @@ msgpack==1.0.8 multipledispatch==1.0.0 # via ibis-framework mypy==1.10.0 - # via - # feast (setup.py) - # sqlalchemy + # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 - # via feast (setup.py) nbclient==0.10.0 # via nbconvert nbconvert==7.16.4 @@ -506,7 +431,6 @@ numpy==1.26.4 # altair # dask # db-dtypes - # feast (setup.py) # great-expectations # ibis-framework # pandas @@ -514,6 +438,8 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib +orjson==3.10.3 + # via fastapi overrides==7.7.0 # via jupyter-server packaging==24.0 @@ -543,7 +469,6 @@ pandas==2.2.2 # dask # dask-expr # db-dtypes - # feast (setup.py) # google-cloud-bigquery # great-expectations # ibis-framework @@ -554,14 +479,15 @@ parso==0.8.4 # via jedi parsy==2.1 # via ibis-framework -partd==1.4.1 +partd==1.4.2 # via dask pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython +pip==24.0 + # via pip-tools pip-tools==7.4.1 - # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core @@ -574,7 +500,6 @@ ply==3.11 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 - # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server prompt-toolkit==3.0.43 @@ -589,7 +514,6 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -607,11 +531,8 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via - # feast (setup.py) - # ipykernel + # via ipykernel psycopg2-binary==2.9.9 - # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -619,7 +540,6 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 - # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 @@ -629,7 +549,6 @@ pyarrow==15.0.2 # dask-expr # db-dtypes # deltalake - # feast (setup.py) # google-cloud-bigquery # ibis-framework # snowflake-connector-python @@ -644,19 +563,16 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 - # via feast (setup.py) pycparser==2.22 # via cffi pydantic==2.7.1 # via # fastapi - # feast (setup.py) # great-expectations pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 +pygments==2.18.0 # via - # feast (setup.py) # ipython # nbconvert # rich @@ -666,11 +582,8 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 - # via feast (setup.py) pymysql==1.1.0 - # via feast (setup.py) pyodbc==5.1.0 - # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 @@ -682,10 +595,8 @@ pyproject-hooks==1.1.0 # build # pip-tools pyspark==3.5.1 - # via feast (setup.py) pytest==7.4.4 # via - # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -695,21 +606,13 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 - # via feast (setup.py) pytest-cov==5.0.0 - # via feast (setup.py) pytest-env==1.1.3 - # via feast (setup.py) pytest-lazy-fixture==0.6.3 - # via feast (setup.py) pytest-mock==1.10.4 - # via feast (setup.py) pytest-ordering==0.6 - # via feast (setup.py) pytest-timeout==1.4.2 - # via feast (setup.py) pytest-xdist==3.6.1 - # via feast (setup.py) python-dateutil==2.9.0.post0 # via # arrow @@ -727,6 +630,8 @@ python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via # great-expectations @@ -737,33 +642,29 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # ibis-substrait # jupyter-events # kubernetes # pre-commit # responses # uvicorn -pyzmq==26.0.2 +pyzmq==26.0.3 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 - # via feast (setup.py) -referencing==0.35.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events regex==2024.4.28 - # via feast (setup.py) requests==2.31.0 # via # azure-core # cachecontrol # docker - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-storage @@ -790,10 +691,11 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.7.1 - # via ibis-framework -rockset==2.1.1 - # via feast (setup.py) -rpds-py==0.18.0 + # via + # ibis-framework + # typer +rockset==2.1.2 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -801,16 +703,21 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruamel-yaml-clib==0.2.8 - # via ruamel-yaml -ruff==0.4.2 - # via feast (setup.py) +ruff==0.4.3 s3transfer==0.10.1 # via boto3 scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +setuptools==69.5.1 + # via + # grpcio-tools + # kubernetes + # nodeenv + # pip-tools +shellingham==1.5.4 + # via typer six==1.16.0 # via # asttokens @@ -830,14 +737,12 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.9.1 - # via feast (setup.py) +snowflake-connector-python[pandas]==3.10.0 sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -850,12 +755,10 @@ sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==2.0.29 +sqlalchemy[mypy]==2.0.30 # via # duckdb-engine - # feast (setup.py) # ibis-framework - # sqlalchemy # sqlalchemy-views sqlalchemy-views==0.3.2 # via ibis-framework @@ -868,30 +771,17 @@ starlette==0.37.2 substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals testcontainers==4.4.0 - # via feast (setup.py) -thriftpy2==0.4.20 +thriftpy2==0.5.0 # via happybase tinycss2==1.3.0 # via nbconvert toml==0.10.2 - # via feast (setup.py) -tomli==2.0.1 - # via - # build - # coverage - # jupyterlab - # mypy - # pip-tools - # pytest - # pytest-env tomlkit==0.12.4 # via snowflake-connector-python toolz==0.12.1 @@ -908,10 +798,8 @@ tornado==6.4 # jupyterlab # notebook # terminado -tqdm==4.66.2 - # via - # feast (setup.py) - # great-expectations +tqdm==4.66.4 + # via great-expectations traitlets==5.14.3 # via # comm @@ -928,43 +816,29 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) +typer==0.12.3 + # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via - # feast (setup.py) - # mypy-protobuf + # via mypy-protobuf types-pymysql==1.1.0.20240425 - # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via - # arrow - # feast (setup.py) + # via arrow types-pytz==2024.1.0.20240417 - # via feast (setup.py) types-pyyaml==6.0.12.20240311 - # via feast (setup.py) types-redis==4.6.0.20240425 - # via feast (setup.py) types-requests==2.30.0.0 - # via feast (setup.py) types-setuptools==69.5.0.20240423 - # via - # feast (setup.py) - # types-cffi + # via types-cffi types-tabulate==0.9.0.20240106 - # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 # via - # anyio - # async-lru # azure-core # azure-storage-blob # fastapi @@ -976,16 +850,17 @@ typing-extensions==4.11.0 # pydantic-core # snowflake-connector-python # sqlalchemy - # starlette # testcontainers # typeguard - # uvicorn + # typer tzdata==2024.1 # via pandas tzlocal==5.2 # via # great-expectations # trino +ujson==5.9.0 + # via fastapi uri-template==1.3.0 # via jsonschema uritemplate==4.1.1 @@ -994,25 +869,21 @@ urllib3==1.26.18 # via # botocore # docker - # feast (setup.py) # great-expectations # kubernetes # minio # requests # responses # rockset - # snowflake-connector-python # testcontainers uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via - # feast (setup.py) - # pre-commit -volatile==2.1.0 - # via bowler + # via pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -1029,7 +900,7 @@ websocket-client==1.8.0 # kubernetes websockets==12.0 # via uvicorn -werkzeug==3.0.2 +werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools @@ -1041,7 +912,3 @@ xmltodict==0.13.0 # via moto zipp==3.18.1 # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/sdk/python/requirements/py3.11-requirements.txt b/sdk/python/requirements/py3.11-requirements.txt index 161e435b54..c34b610d14 100644 --- a/sdk/python/requirements/py3.11-requirements.txt +++ b/sdk/python/requirements/py3.11-requirements.txt @@ -1,97 +1,93 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=sdk/python/requirements/py3.11-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.11-requirements.txt annotated-types==0.6.0 # via pydantic anyio==4.3.0 # via + # httpx # starlette # watchfiles -appdirs==1.4.4 - # via fissix attrs==23.2.0 # via - # bowler # jsonschema # referencing -bowler==0.9.0 - # via feast (setup.py) certifi==2024.2.2 - # via requests + # via + # httpcore + # httpx + # requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # bowler # dask - # feast (setup.py) - # moreorless + # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via feast (setup.py) -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.13 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask dill==0.3.8 - # via feast (setup.py) -exceptiongroup==1.2.1 - # via anyio -fastapi==0.110.2 - # via feast (setup.py) -fissix==24.4.24 - # via bowler +dnspython==2.6.1 + # via email-validator +email-validator==2.1.1 + # via fastapi +fastapi==0.111.0 + # via fastapi-cli +fastapi-cli==0.0.2 + # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 # via sqlalchemy -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 - # via uvicorn + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx httptools==0.6.1 # via uvicorn +httpx==0.27.0 + # via fastapi idna==3.7 # via # anyio + # email-validator + # httpx # requests importlib-metadata==7.1.0 - # via - # dask - # typeguard -jinja2==3.1.3 - # via feast (setup.py) -jsonschema==4.21.1 - # via feast (setup.py) + # via dask +jinja2==3.1.4 + # via fastapi +jsonschema==4.22.0 jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 +mdurl==0.1.2 + # via markdown-it-py mmh3==4.1.0 - # via feast (setup.py) -moreorless==0.4.0 - # via bowler mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 - # via feast (setup.py) numpy==1.26.4 # via # dask - # feast (setup.py) # pandas # pyarrow +orjson==3.10.3 + # via fastapi packaging==24.0 # via # dask @@ -100,95 +96,86 @@ pandas==2.2.2 # via # dask # dask-expr - # feast (setup.py) -partd==1.4.1 +partd==1.4.2 # via dask protobuf==4.25.3 - # via - # feast (setup.py) - # mypy-protobuf + # via mypy-protobuf pyarrow==16.0.0 - # via - # dask-expr - # feast (setup.py) + # via dask-expr pydantic==2.7.1 - # via - # fastapi - # feast (setup.py) + # via fastapi pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 - # via feast (setup.py) +pygments==2.18.0 + # via rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via pandas pyyaml==6.0.1 # via # dask - # feast (setup.py) # uvicorn -referencing==0.35.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 - # via feast (setup.py) -rpds-py==0.18.0 +rich==13.7.1 + # via typer +rpds-py==0.18.1 # via # jsonschema # referencing +shellingham==1.5.4 + # via typer six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anyio -sqlalchemy[mypy]==2.0.29 # via - # feast (setup.py) - # sqlalchemy + # anyio + # httpx +sqlalchemy[mypy]==2.0.30 starlette==0.37.2 # via fastapi tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 toml==0.10.2 - # via feast (setup.py) -tomli==2.0.1 - # via mypy toolz==0.12.1 # via # dask # partd -tqdm==4.66.2 - # via feast (setup.py) +tqdm==4.66.4 typeguard==4.2.1 - # via feast (setup.py) +typer==0.12.3 + # via fastapi-cli types-protobuf==5.26.0.20240422 # via mypy-protobuf typing-extensions==4.11.0 # via - # anyio # fastapi # mypy # pydantic # pydantic-core # sqlalchemy - # starlette # typeguard - # uvicorn + # typer tzdata==2024.1 # via pandas +ujson==5.9.0 + # via fastapi urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli uvloop==0.19.0 # via uvicorn -volatile==2.1.0 - # via bowler watchfiles==0.21.0 # via uvicorn websockets==12.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 9486743f77..a628f0823d 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --extra=ci --output-file=sdk/python/requirements/py3.9-ci-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.9-ci-requirements.txt alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -25,7 +21,6 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 - # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 @@ -43,10 +38,8 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 - # via feast (setup.py) azure-storage-blob==12.19.1 - # via feast (setup.py) -babel==2.14.0 +babel==2.15.0 # via # jupyterlab-server # sphinx @@ -56,25 +49,20 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.88 - # via - # feast (setup.py) - # moto -botocore==1.34.88 +boto3==1.34.99 + # via moto +botocore==1.34.99 # via # boto3 # moto # s3transfer build==1.2.1 - # via - # feast (setup.py) - # pip-tools + # via pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 - # via feast (setup.py) certifi==2024.2.2 # via # httpcore @@ -97,28 +85,25 @@ charset-normalizer==3.3.2 click==8.1.7 # via # dask - # feast (setup.py) # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via - # feast (setup.py) - # great-expectations + # via great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.4.4 +coverage[toml]==7.5.1 # via pytest-cov -cryptography==42.0.5 +cryptography==42.0.7 # via # azure-identity # azure-storage-blob - # feast (setup.py) # great-expectations # moto # msal @@ -127,11 +112,9 @@ cryptography==42.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.12 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -141,24 +124,24 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.16.4 - # via feast (setup.py) +deltalake==0.17.3 dill==0.3.8 - # via feast (setup.py) distlib==0.3.8 # via virtualenv +dnspython==2.6.1 + # via email-validator docker==7.0.0 - # via - # feast (setup.py) - # testcontainers + # via testcontainers docutils==0.19 # via sphinx duckdb==0.10.2 # via # duckdb-engine # ibis-framework -duckdb-engine==0.11.5 +duckdb-engine==0.12.0 # via ibis-framework +email-validator==2.1.1 + # via fastapi entrypoints==0.4 # via altair exceptiongroup==1.2.1 @@ -170,29 +153,27 @@ execnet==2.1.1 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via fastapi-cli +fastapi-cli==0.0.2 + # via fastapi fastjsonschema==2.19.1 # via nbformat -filelock==3.13.4 +filelock==3.14.0 # via # snowflake-connector-python # virtualenv firebase-admin==5.4.0 - # via feast (setup.py) fqdn==1.5.1 # via jsonschema fsspec==2023.12.2 - # via - # dask - # feast (setup.py) + # via dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.18.0 +google-api-core[grpc]==2.19.0 # via - # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -202,13 +183,14 @@ google-api-core[grpc]==2.18.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.126.0 +google-api-python-client==2.128.0 # via firebase-admin google-auth==2.29.0 # via # google-api-core # google-api-python-client # google-auth-httplib2 + # google-cloud-bigquery-storage # google-cloud-core # google-cloud-firestore # google-cloud-storage @@ -216,11 +198,8 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) -google-cloud-bigquery-storage==2.24.0 - # via feast (setup.py) +google-cloud-bigquery-storage==2.25.0 google-cloud-bigtable==2.23.1 - # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -229,13 +208,10 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 - # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via - # feast (setup.py) - # firebase-admin + # via firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -246,19 +222,16 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via - # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.12 - # via feast (setup.py) +great-expectations==0.18.13 greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.62.2 +grpcio==1.63.0 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -269,27 +242,19 @@ grpcio==1.62.2 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 - # via feast (setup.py) grpcio-reflection==1.62.2 - # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 - # via feast (setup.py) grpcio-tools==1.62.2 - # via feast (setup.py) -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 - # via feast (setup.py) hazelcast-python-client==5.3.0 - # via feast (setup.py) hiredis==2.3.2 - # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -300,19 +265,17 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via - # feast (setup.py) + # fastapi # jupyterlab ibis-framework[duckdb]==8.0.0 - # via - # feast (setup.py) - # ibis-substrait + # via ibis-substrait ibis-substrait==3.2.0 - # via feast (setup.py) -identify==2.5.35 +identify==2.5.36 # via pre-commit idna==3.7 # via # anyio + # email-validator # httpx # jsonschema # requests @@ -347,10 +310,10 @@ isoduration==20.11.0 # via jsonschema jedi==0.19.1 # via ipython -jinja2==3.1.3 +jinja2==3.1.4 # via # altair - # feast (setup.py) + # fastapi # great-expectations # jupyter-server # jupyterlab @@ -370,10 +333,9 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.21.1 +jsonschema[format-nongpl]==4.22.0 # via # altair - # feast (setup.py) # great-expectations # jupyter-events # jupyterlab-server @@ -407,18 +369,17 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.6 +jupyterlab==4.1.8 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.26.0 +jupyterlab-server==2.27.1 # via # jupyterlab # notebook jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 - # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -430,7 +391,7 @@ markupsafe==2.1.5 # jinja2 # nbconvert # werkzeug -marshmallow==3.21.1 +marshmallow==3.21.2 # via great-expectations matplotlib-inline==0.1.7 # via @@ -439,17 +400,13 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 - # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 - # via feast (setup.py) mock==2.0.0 - # via feast (setup.py) moto==4.2.14 - # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -460,17 +417,14 @@ msgpack==1.0.8 # via cachecontrol multipledispatch==1.0.0 # via ibis-framework -mypy==1.9.0 - # via - # feast (setup.py) - # sqlalchemy +mypy==1.10.0 + # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 - # via feast (setup.py) nbclient==0.10.0 # via nbconvert -nbconvert==7.16.3 +nbconvert==7.16.4 # via jupyter-server nbformat==5.10.4 # via @@ -493,7 +447,6 @@ numpy==1.26.4 # altair # dask # db-dtypes - # feast (setup.py) # great-expectations # ibis-framework # pandas @@ -501,6 +454,8 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib +orjson==3.10.3 + # via fastapi overrides==7.7.0 # via jupyter-server packaging==24.0 @@ -530,7 +485,6 @@ pandas==2.2.2 # dask # dask-expr # db-dtypes - # feast (setup.py) # google-cloud-bigquery # great-expectations # ibis-framework @@ -541,27 +495,27 @@ parso==0.8.4 # via jedi parsy==2.1 # via ibis-framework -partd==1.4.1 +partd==1.4.2 # via dask pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython +pip==24.0 + # via pip-tools pip-tools==7.4.1 - # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest ply==3.11 # via thriftpy2 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 - # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server prompt-toolkit==3.0.43 @@ -576,7 +530,6 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -594,11 +547,8 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via - # feast (setup.py) - # ipykernel + # via ipykernel psycopg2-binary==2.9.9 - # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -606,7 +556,6 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 - # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 @@ -616,7 +565,6 @@ pyarrow==15.0.2 # dask-expr # db-dtypes # deltalake - # feast (setup.py) # google-cloud-bigquery # ibis-framework # snowflake-connector-python @@ -631,19 +579,16 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 - # via feast (setup.py) pycparser==2.22 # via cffi -pydantic==2.7.0 +pydantic==2.7.1 # via # fastapi - # feast (setup.py) # great-expectations -pydantic-core==2.18.1 +pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 +pygments==2.18.0 # via - # feast (setup.py) # ipython # nbconvert # rich @@ -653,26 +598,21 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 - # via feast (setup.py) pymysql==1.1.0 - # via feast (setup.py) pyodbc==5.1.0 - # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 # via # great-expectations # httplib2 -pyproject-hooks==1.0.0 +pyproject-hooks==1.1.0 # via # build # pip-tools pyspark==3.5.1 - # via feast (setup.py) pytest==7.4.4 # via - # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -682,21 +622,13 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 - # via feast (setup.py) pytest-cov==5.0.0 - # via feast (setup.py) pytest-env==1.1.3 - # via feast (setup.py) pytest-lazy-fixture==0.6.3 - # via feast (setup.py) pytest-mock==1.10.4 - # via feast (setup.py) pytest-ordering==0.6 - # via feast (setup.py) pytest-timeout==1.4.2 - # via feast (setup.py) -pytest-xdist==3.6.0 - # via feast (setup.py) +pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 # via # arrow @@ -714,6 +646,8 @@ python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via # great-expectations @@ -724,33 +658,29 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # ibis-substrait # jupyter-events # kubernetes # pre-commit # responses # uvicorn -pyzmq==26.0.2 +pyzmq==26.0.3 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 - # via feast (setup.py) -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.16 - # via feast (setup.py) +regex==2024.4.28 requests==2.31.0 # via # azure-core # cachecontrol # docker - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-storage @@ -777,10 +707,11 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.7.1 - # via ibis-framework -rockset==2.1.1 - # via feast (setup.py) -rpds-py==0.18.0 + # via + # ibis-framework + # typer +rockset==2.1.2 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -790,14 +721,21 @@ ruamel-yaml==0.17.17 # via great-expectations ruamel-yaml-clib==0.2.8 # via ruamel-yaml -ruff==0.4.1 - # via feast (setup.py) +ruff==0.4.3 s3transfer==0.10.1 # via boto3 scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +setuptools==69.5.1 + # via + # grpcio-tools + # kubernetes + # nodeenv + # pip-tools +shellingham==1.5.4 + # via typer six==1.16.0 # via # asttokens @@ -817,14 +755,12 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.9.0 - # via feast (setup.py) +snowflake-connector-python[pandas]==3.10.0 sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -837,12 +773,10 @@ sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==2.0.29 +sqlalchemy[mypy]==2.0.30 # via # duckdb-engine - # feast (setup.py) # ibis-framework - # sqlalchemy # sqlalchemy-views sqlalchemy-views==0.3.2 # via ibis-framework @@ -852,24 +786,20 @@ stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.16.0 +substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -testcontainers==4.3.3 - # via feast (setup.py) -thriftpy2==0.4.20 +testcontainers==4.4.0 +thriftpy2==0.5.0 # via happybase -tinycss2==1.2.1 +tinycss2==1.3.0 # via nbconvert toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via # build @@ -877,7 +807,6 @@ tomli==2.0.1 # jupyterlab # mypy # pip-tools - # pyproject-hooks # pytest # pytest-env tomlkit==0.12.4 @@ -896,10 +825,8 @@ tornado==6.4 # jupyterlab # notebook # terminado -tqdm==4.66.2 - # via - # feast (setup.py) - # great-expectations +tqdm==4.66.4 + # via great-expectations traitlets==5.14.3 # via # comm @@ -916,37 +843,25 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) +typer==0.12.3 + # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via - # feast (setup.py) - # mypy-protobuf -types-pymysql==1.1.0.1 - # via feast (setup.py) -types-pyopenssl==24.0.0.20240417 + # via mypy-protobuf +types-pymysql==1.1.0.20240425 +types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via - # arrow - # feast (setup.py) + # via arrow types-pytz==2024.1.0.20240417 - # via feast (setup.py) types-pyyaml==6.0.12.20240311 - # via feast (setup.py) -types-redis==4.6.0.20240417 - # via feast (setup.py) +types-redis==4.6.0.20240425 types-requests==2.30.0.0 - # via feast (setup.py) -types-setuptools==69.5.0.20240415 - # via - # feast (setup.py) - # types-cffi +types-setuptools==69.5.0.20240423 + # via types-cffi types-tabulate==0.9.0.20240106 - # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 @@ -967,6 +882,7 @@ typing-extensions==4.11.0 # starlette # testcontainers # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas @@ -974,6 +890,8 @@ tzlocal==5.2 # via # great-expectations # trino +ujson==5.9.0 + # via fastapi uri-template==1.3.0 # via jsonschema uritemplate==4.1.1 @@ -982,7 +900,6 @@ urllib3==1.26.18 # via # botocore # docker - # feast (setup.py) # great-expectations # kubernetes # minio @@ -992,13 +909,13 @@ urllib3==1.26.18 # snowflake-connector-python # testcontainers uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via - # feast (setup.py) - # pre-commit + # via pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -1009,13 +926,13 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.7.0 +websocket-client==1.8.0 # via # jupyter-server # kubernetes websockets==12.0 # via uvicorn -werkzeug==3.0.2 +werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools @@ -1027,7 +944,3 @@ xmltodict==0.13.0 # via moto zipp==3.18.1 # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 368b242126..1092aac9d0 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -1,13 +1,10 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=sdk/python/requirements/py3.9-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.9-requirements.txt annotated-types==0.6.0 # via pydantic anyio==4.3.0 # via + # httpx # starlette # watchfiles attrs==23.2.0 @@ -15,72 +12,86 @@ attrs==23.2.0 # jsonschema # referencing certifi==2024.2.2 - # via requests + # via + # httpcore + # httpx + # requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via # dask - # feast (setup.py) + # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via feast (setup.py) -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.12 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask dill==0.3.8 - # via feast (setup.py) +dnspython==2.6.1 + # via email-validator +email-validator==2.1.1 + # via fastapi exceptiongroup==1.2.1 # via anyio -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via fastapi-cli +fastapi-cli==0.0.2 + # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 # via sqlalchemy -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 - # via uvicorn + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx httptools==0.6.1 # via uvicorn +httpx==0.27.0 + # via fastapi idna==3.7 # via # anyio + # email-validator + # httpx # requests importlib-metadata==7.1.0 # via # dask # typeguard -jinja2==3.1.3 - # via feast (setup.py) -jsonschema==4.21.1 - # via feast (setup.py) +jinja2==3.1.4 + # via fastapi +jsonschema==4.22.0 jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 +mdurl==0.1.2 + # via markdown-it-py mmh3==4.1.0 - # via feast (setup.py) -mypy==1.9.0 +mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 - # via feast (setup.py) numpy==1.26.4 # via # dask - # feast (setup.py) # pandas # pyarrow +orjson==3.10.3 + # via fastapi packaging==24.0 # via # dask @@ -89,73 +100,66 @@ pandas==2.2.2 # via # dask # dask-expr - # feast (setup.py) -partd==1.4.1 +partd==1.4.2 # via dask protobuf==4.25.3 - # via - # feast (setup.py) - # mypy-protobuf -pyarrow==15.0.2 - # via - # dask-expr - # feast (setup.py) -pydantic==2.7.0 - # via - # fastapi - # feast (setup.py) -pydantic-core==2.18.1 + # via mypy-protobuf +pyarrow==16.0.0 + # via dask-expr +pydantic==2.7.1 + # via fastapi +pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 - # via feast (setup.py) +pygments==2.18.0 + # via rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via pandas pyyaml==6.0.1 # via # dask - # feast (setup.py) # uvicorn -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 - # via feast (setup.py) -rpds-py==0.18.0 +rich==13.7.1 + # via typer +rpds-py==0.18.1 # via # jsonschema # referencing +shellingham==1.5.4 + # via typer six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anyio -sqlalchemy[mypy]==2.0.29 # via - # feast (setup.py) - # sqlalchemy + # anyio + # httpx +sqlalchemy[mypy]==2.0.30 starlette==0.37.2 # via fastapi tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via mypy toolz==0.12.1 # via # dask # partd -tqdm==4.66.2 - # via feast (setup.py) +tqdm==4.66.4 typeguard==4.2.1 - # via feast (setup.py) -types-protobuf==5.26.0.20240420 +typer==0.12.3 + # via fastapi-cli +types-protobuf==5.26.0.20240422 # via mypy-protobuf typing-extensions==4.11.0 # via @@ -167,13 +171,18 @@ typing-extensions==4.11.0 # sqlalchemy # starlette # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas +ujson==5.9.0 + # via fastapi urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli uvloop==0.19.0 # via uvicorn watchfiles==0.21.0 @@ -181,4 +190,4 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.18.1 - # via importlib-metadata + # via importlib-metadata \ No newline at end of file diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index 2a830d424c..3b9146a7b9 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -82,6 +82,15 @@ def online_read( ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: return [] + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + return [] + def retrieve_saved_dataset(self, config: RepoConfig, dataset: SavedDataset): pass @@ -111,6 +120,7 @@ def retrieve_online_documents( requested_feature: str, query: List[float], top_k: int, + distance_metric: Optional[str] = None, ) -> List[ Tuple[ Optional[datetime], diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 096744f547..4007106a06 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -461,7 +461,7 @@ def construct_test_environment( test_repo_config.python_feature_server and test_repo_config.provider == "aws" ) or test_repo_config.registry_location == RegistryLocation.S3: aws_registry_path = os.getenv( - "AWS_REGISTRY_PATH", "s3://feast-integration-tests/registries" + "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" ) registry: Union[str, RegistryConfig] = ( f"{aws_registry_path}/{project}/registry.db" diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index 60fb8950a9..8fe933fbba 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -30,16 +30,17 @@ def __init__(self, project_name: str, *args, **kwargs): self.s3 = aws_utils.get_s3_resource(os.getenv("AWS_REGION", "us-west-2")) self.offline_store_config = RedshiftOfflineStoreConfig( - cluster_id=os.getenv("AWS_CLUSTER_ID", "feast-integration-tests"), + cluster_id=os.getenv("AWS_CLUSTER_ID", "feast-int-bucket"), region=os.getenv("AWS_REGION", "us-west-2"), user=os.getenv("AWS_USER", "admin"), database=os.getenv("AWS_DB", "feast"), s3_staging_location=os.getenv( "AWS_STAGING_LOCATION", - "s3://feast-integration-tests/redshift/tests/ingestion", + "s3://feast-int-bucket/redshift/tests/ingestion", ), iam_role=os.getenv( - "AWS_IAM_ROLE", "arn:aws:iam::402087665549:role/redshift_s3_access_role" + "AWS_IAM_ROLE", + "arn:aws:iam::585132637328:role/service-role/AmazonRedshift-CommandsAccessRole-20240403T092631", ), workgroup="", ) diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py b/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py new file mode 100644 index 0000000000..c62a9009ca --- /dev/null +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py @@ -0,0 +1,28 @@ +from typing import Dict + +from testcontainers.elasticsearch import ElasticSearchContainer + +from tests.integration.feature_repos.universal.online_store_creator import ( + OnlineStoreCreator, +) + + +class ElasticSearchOnlineStoreCreator(OnlineStoreCreator): + def __init__(self, project_name: str, **kwargs): + super().__init__(project_name) + self.container = ElasticSearchContainer( + "elasticsearch:8.3.3", + ).with_exposed_ports(9200) + + def create_online_store(self) -> Dict[str, str]: + self.container.start() + return { + "host": "localhost", + "type": "elasticsearch", + "port": self.container.get_exposed_port(9200), + "vector_len": 2, + "similarity": "cosine", + } + + def teardown(self): + self.container.stop() diff --git a/sdk/python/tests/integration/materialization/test_universal_materialization.py b/sdk/python/tests/integration/materialization/test_universal_materialization.py new file mode 100644 index 0000000000..37030b1bb3 --- /dev/null +++ b/sdk/python/tests/integration/materialization/test_universal_materialization.py @@ -0,0 +1,45 @@ +from datetime import timedelta + +import pytest + +from feast.entity import Entity +from feast.feature_view import FeatureView +from feast.field import Field +from feast.types import Float32 +from tests.data.data_creator import create_basic_driver_dataset +from tests.utils.e2e_test_validation import validate_offline_online_store_consistency + + +@pytest.mark.integration +@pytest.mark.universal_offline_stores +def test_universal_materialization_consistency(environment): + fs = environment.feature_store + + df = create_basic_driver_dataset() + + ds = environment.data_source_creator.create_data_source( + df, + fs.project, + field_mapping={"ts_1": "ts"}, + ) + + driver = Entity( + name="driver_id", + join_keys=["driver_id"], + ) + + driver_stats_fv = FeatureView( + name="driver_hourly_stats", + entities=[driver], + ttl=timedelta(weeks=52), + schema=[Field(name="value", dtype=Float32)], + source=ds, + ) + + fs.apply([driver, driver_stats_fv]) + + # materialization is run in two steps and + # we use timestamp from generated dataframe as a split point + split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) + + validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 3ae7be9e1e..9beba4d72b 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -789,7 +789,7 @@ def assert_feature_service_entity_mapping_correctness( @pytest.mark.integration -@pytest.mark.universal_online_stores(only=["pgvector"]) +@pytest.mark.universal_online_stores(only=["pgvector", "elasticsearch"]) def test_retrieve_online_documents(environment, fake_document_data): fs = environment.feature_store df, data_source = fake_document_data @@ -798,6 +798,25 @@ def test_retrieve_online_documents(environment, fake_document_data): fs.write_to_online_store("item_embeddings", df) documents = fs.retrieve_online_documents( - feature="item_embeddings:embedding_float", query=[1.0, 2.0], top_k=2 + feature="item_embeddings:embedding_float", + query=[1.0, 2.0], + top_k=2, + distance_metric="L2", ).to_dict() assert len(documents["embedding_float"]) == 2 + + documents = fs.retrieve_online_documents( + feature="item_embeddings:embedding_float", + query=[1.0, 2.0], + top_k=2, + distance_metric="L1", + ).to_dict() + assert len(documents["embedding_float"]) == 2 + + with pytest.raises(ValueError): + fs.retrieve_online_documents( + feature="item_embeddings:embedding_float", + query=[1.0, 2.0], + top_k=2, + distance_metric="wrong", + ).to_dict() diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index deb1b0635f..bf0c2fb61f 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -226,7 +226,7 @@ def feature_store_with_gcs_registry(): @pytest.fixture def feature_store_with_s3_registry(): aws_registry_path = os.getenv( - "AWS_REGISTRY_PATH", "s3://feast-integration-tests/registries" + "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" ) return FeatureStore( config=RepoConfig( diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 232f035609..70d118ecf9 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -53,7 +53,7 @@ def gcs_registry() -> Registry: @pytest.fixture def s3_registry() -> Registry: aws_registry_path = os.getenv( - "AWS_REGISTRY_PATH", "s3://feast-integration-tests/registries" + "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" ) registry_config = RegistryConfig( path=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", diff --git a/sdk/python/tests/unit/diff/test_infra_diff.py b/sdk/python/tests/unit/diff/test_infra_diff.py index 8e3d5b765f..3a0443e634 100644 --- a/sdk/python/tests/unit/diff/test_infra_diff.py +++ b/sdk/python/tests/unit/diff/test_infra_diff.py @@ -39,10 +39,14 @@ def test_tag_infra_proto_objects_for_keep_delete_add(): def test_diff_between_datastore_tables(): pre_changed = DatastoreTable( - project="test", name="table", project_id="pre", namespace="pre" + project="test", name="table", project_id="pre", namespace="pre", database="pre" ).to_proto() post_changed = DatastoreTable( - project="test", name="table", project_id="post", namespace="post" + project="test", + name="table", + project_id="post", + namespace="post", + database="post", ).to_proto() infra_object_diff = diff_between(pre_changed, pre_changed, "datastore table") @@ -51,7 +55,7 @@ def test_diff_between_datastore_tables(): infra_object_diff = diff_between(pre_changed, post_changed, "datastore table") infra_object_property_diffs = infra_object_diff.infra_object_property_diffs - assert len(infra_object_property_diffs) == 2 + assert len(infra_object_property_diffs) == 3 assert infra_object_property_diffs[0].property_name == "project_id" assert infra_object_property_diffs[0].val_existing == wrappers.StringValue( @@ -67,6 +71,13 @@ def test_diff_between_datastore_tables(): assert infra_object_property_diffs[1].val_declared == wrappers.StringValue( value="post" ) + assert infra_object_property_diffs[2].property_name == "database" + assert infra_object_property_diffs[2].val_existing == wrappers.StringValue( + value="pre" + ) + assert infra_object_property_diffs[2].val_declared == wrappers.StringValue( + value="post" + ) def test_diff_infra_protos(): diff --git a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py index 8ff19d03c1..48b5ddb59b 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py @@ -117,12 +117,12 @@ def retrieval_job(request, environment): return FileRetrievalJob(lambda: 1, full_feature_names=False) elif request.param is RedshiftRetrievalJob: offline_store_config = RedshiftOfflineStoreConfig( - cluster_id="feast-integration-tests", + cluster_id="feast-int-bucket", region="us-west-2", user="admin", database="feast", - s3_staging_location="s3://feast-integration-tests/redshift/tests/ingestion", - iam_role="arn:aws:iam::402087665549:role/redshift_s3_access_role", + s3_staging_location="s3://feast-int-bucket/redshift/tests/ingestion", + iam_role="arn:aws:iam::585132637328:role/service-role/AmazonRedshift-CommandsAccessRole-20240403T092631", workgroup="", ) environment.test_repo_config.offline_store = offline_store_config diff --git a/sdk/python/tests/unit/infra/online_store/test_redis.py b/sdk/python/tests/unit/infra/online_store/test_redis.py new file mode 100644 index 0000000000..c26c2f25c5 --- /dev/null +++ b/sdk/python/tests/unit/infra/online_store/test_redis.py @@ -0,0 +1,130 @@ +import pytest +from google.protobuf.timestamp_pb2 import Timestamp + +from feast import Entity, FeatureView, Field, FileSource, RepoConfig +from feast.infra.online_stores.redis import RedisOnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.types import Int32 + + +@pytest.fixture +def redis_online_store() -> RedisOnlineStore: + return RedisOnlineStore() + + +@pytest.fixture +def repo_config(): + return RepoConfig( + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.db", + ) + + +@pytest.fixture +def feature_view(): + file_source = FileSource(name="my_file_source", path="test.parquet") + entity = Entity(name="entity", join_keys=["entity"]) + feature_view = FeatureView( + name="feature_view_1", + entities=[entity], + schema=[ + Field(name="feature_10", dtype=Int32), + Field(name="feature_11", dtype=Int32), + Field(name="feature_12", dtype=Int32), + ], + source=file_source, + ) + return feature_view + + +def test_generate_entity_redis_keys(redis_online_store: RedisOnlineStore, repo_config): + entity_keys = [ + EntityKeyProto(join_keys=["entity"], entity_values=[ValueProto(int32_val=1)]), + ] + + actual = redis_online_store._generate_redis_keys_for_entities( + repo_config, entity_keys + ) + expected = [ + b"\x02\x00\x00\x00entity\x03\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00test" + ] + assert actual == expected + + +def test_generate_hset_keys_for_features( + redis_online_store: RedisOnlineStore, feature_view +): + actual = redis_online_store._generate_hset_keys_for_features(feature_view) + expected = ( + ["feature_10", "feature_11", "feature_12", "_ts:feature_view_1"], + [b"&m_9", b"\xc37\x9a\xbf", b"wr\xb5d", "_ts:feature_view_1"], + ) + assert actual == expected + + +def test_generate_hset_keys_for_features_with_requested_features( + redis_online_store: RedisOnlineStore, feature_view +): + actual = redis_online_store._generate_hset_keys_for_features( + feature_view=feature_view, requested_features=["my-feature-view:feature1"] + ) + expected = ( + ["my-feature-view:feature1", "_ts:feature_view_1"], + [b"Si\x86J", "_ts:feature_view_1"], + ) + assert actual == expected + + +def test_convert_redis_values_to_protobuf( + redis_online_store: RedisOnlineStore, feature_view +): + requested_features = [ + "feature_view_1:feature_10", + "feature_view_1:feature_11", + "_ts:feature_view_1", + ] + values = [ + [ + ValueProto(int32_val=1).SerializeToString(), + ValueProto(int32_val=2).SerializeToString(), + Timestamp().SerializeToString(), + ] + ] + + features = redis_online_store._convert_redis_values_to_protobuf( + redis_values=values, + feature_view=feature_view.name, + requested_features=requested_features, + ) + assert isinstance(features, list) + assert len(features) == 1 + + timestamp, features = features[0] + assert features["feature_view_1:feature_10"].int32_val == 1 + assert features["feature_view_1:feature_11"].int32_val == 2 + + +def test_get_features_for_entity(redis_online_store: RedisOnlineStore, feature_view): + requested_features = [ + "feature_view_1:feature_10", + "feature_view_1:feature_11", + "_ts:feature_view_1", + ] + values = [ + ValueProto(int32_val=1).SerializeToString(), + ValueProto(int32_val=2).SerializeToString(), + Timestamp().SerializeToString(), + ] + + timestamp, features = redis_online_store._get_features_for_entity( + values=values, + feature_view=feature_view.name, + requested_features=requested_features, + ) + assert "feature_view_1:feature_10" in features + assert "feature_view_1:feature_11" in features + assert features["feature_view_1:feature_10"].int32_val == 1 + assert features["feature_view_1:feature_11"].int32_val == 2 diff --git a/setup.py b/setup.py index 08fe3353ab..e109ca8919 100644 --- a/setup.py +++ b/setup.py @@ -137,8 +137,8 @@ ] IBIS_REQUIRED = [ - "ibis-framework", - "ibis-substrait", + "ibis-framework>=8.0.0,<9", + "ibis-substrait<=3.2.0", ] GRPCIO_REQUIRED = [ @@ -148,10 +148,12 @@ "grpcio-health-checking>=1.56.2,<2", ] -DUCKDB_REQUIRED = ["ibis-framework[duckdb]"] +DUCKDB_REQUIRED = ["ibis-framework[duckdb]>=8.0.0,<9"] DELTA_REQUIRED = ["deltalake"] +ELASTICSEARCH_REQUIRED = ["elasticsearch>=8.13.0"] + CI_REQUIRED = ( [ "build", @@ -214,6 +216,7 @@ + DUCKDB_REQUIRED + DELTA_REQUIRED + MARIADB_REQUIRED + + ELASTICSEARCH_REQUIRED ) DOCS_REQUIRED = CI_REQUIRED @@ -381,6 +384,7 @@ def run(self): "ikv": IKV_REQUIRED, "delta": DELTA_REQUIRED, "mariadb": MARIADB_REQUIRED, + "elasticsearch": ELASTICSEARCH_REQUIRED, }, include_package_data=True, license="Apache",