diff --git a/.github/labeler.yml b/.github/labeler.yml index 5c1e12fa8..e1e9727f6 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,43 +1,53 @@ # Integrations integration:chroma: -- changed-files: - - any-glob-to-any-file: 'integrations/chroma/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/chroma/**/*" integration:cohere: -- changed-files: - - any-glob-to-any-file: 'integrations/cohere/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/cohere/**/*" integration:elasticsearch: -- changed-files: - - any-glob-to-any-file: 'integrations/elasticsearch/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/elasticsearch/**/*" + +integration:google-vertex: + - changed-files: + - any-glob-to-any-file: "integrations/google-vertex/**/*" integration:gradient: -- changed-files: - - any-glob-to-any-file: 'integrations/gradient/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/gradient/**/*" integration:instructor-embedders: -- changed-files: - - any-glob-to-any-file: 'integrations/instructor-embedders/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/instructor-embedders/**/*" integration:jina: -- changed-files: - - any-glob-to-any-file: 'integrations/jina/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/jina/**/*" integration:opensearch: -- changed-files: - - any-glob-to-any-file: 'integrations/opensearch/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/opensearch/**/*" integration:unstructured-fileconverter: -- changed-files: - - any-glob-to-any-file: 'integrations/unstructured/fileconverter/**/*' + - changed-files: + - any-glob-to-any-file: "integrations/unstructured/fileconverter/**/*" # Topics topic:CI: -- changed-files: - - any-glob-to-any-file: ['.github/*', '.github/**/*'] + - changed-files: + - any-glob-to-any-file: [".github/*", ".github/**/*"] topic:DX: -- changed-files: - - any-glob-to-any-file: ['CONTRIBUTING.md', '.pre-commit-config.yaml', '.gitignore', 'requirements.txt'] + - changed-files: + - any-glob-to-any-file: + [ + "CONTRIBUTING.md", + ".pre-commit-config.yaml", + ".gitignore", + "requirements.txt", + ] topic:security: -- changed-files: - - any-glob-to-any-file: ['SECURITY.md'] + - changed-files: + - any-glob-to-any-file: ["SECURITY.md"] diff --git a/.github/workflows/google_vertex.yml b/.github/workflows/google_vertex.yml new file mode 100644 index 000000000..7be3973bf --- /dev/null +++ b/.github/workflows/google_vertex.yml @@ -0,0 +1,56 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / google-vertex + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/google-vertex/**" + - ".github/workflows/google-vertex.yml" + +defaults: + run: + working-directory: integrations/google-vertex + +concurrency: + group: google-vertex-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ["3.9", "3.10"] + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install --upgrade hatch + + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all + + - name: Run tests + run: hatch run cov diff --git a/README.md b/README.md index 024945b1c..395f59db4 100644 --- a/README.md +++ b/README.md @@ -11,16 +11,19 @@ You will need `hatch` to work on or create new integrations. Run `pip install ha All the integrations are self contained, so the first step before working on one is to `cd` into the proper folder. For example, to work on the Chroma Document Store, from the root of the repo: + ```sh $ cd integrations/chroma ``` From there, you can run the tests with `hatch`, that will take care of setting up an isolated Python environment: + ```sh hatch run test ``` Similarly, to run the linters: + ```sh hatch run lint:all ``` @@ -31,11 +34,13 @@ hatch run lint:all > you're integrating Haystack with. For example, a deepset integration would be named as `deepset-haystack`. To create a new integration, from the root of the repo change directory into `integrations`: + ```sh cd integrations ``` From there, use `hatch` to create the scaffold of the new integration: + ```sh $ hatch --config hatch.toml new -i Project name: deepset-haystack @@ -58,10 +63,11 @@ deepset-haystack | Package | Type | PyPi Package | Status | | ------------------------------------------------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | [chroma-haystack](integrations/chroma/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/chroma-haystack.svg)](https://pypi.org/project/chroma-haystack) | [![Test / chroma](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/chroma.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/chroma.yml) | -| [cohere-haystack](integrations/cohere/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/cohere-haystack.svg)](https://pypi.org/project/cohere-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml) | +| [cohere-haystack](integrations/cohere/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/cohere-haystack.svg)](https://pypi.org/project/cohere-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml) | | [elasticsearch-haystack](integrations/elasticsearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack) | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml) | +| [google-vertex-haystack](integrations/google-vertex/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack) | [![Test / google-vertex](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google-vertex.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google-vertex.yml) | | [gradient-haystack](integrations/gradient/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/gradient-haystack.svg)](https://pypi.org/project/gradient-haystack) | [![Test / gradient](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/gradient.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/gradient.yml) | | [instructor-embedders-haystack](integrations/instructor-embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) | | [opensearch-haystack](integrations/opensearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/opensearch-haystack.svg)](https://pypi.org/project/opensearch-haystack) | [![Test / opensearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml) | | [unstructured-fileconverter-haystack](integrations/unstructured/fileconverter/) | File converter | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured / fileconverter](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured_fileconverter.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured_fileconverter.yml) | -| [jina-haystack](integrations/jina/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) | +| [jina-haystack](integrations/jina/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) | diff --git a/integrations/google-vertex/LICENSE.txt b/integrations/google-vertex/LICENSE.txt new file mode 100644 index 000000000..6134ab324 --- /dev/null +++ b/integrations/google-vertex/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2023-present deepset GmbH + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/integrations/google-vertex/README.md b/integrations/google-vertex/README.md new file mode 100644 index 000000000..17a445c02 --- /dev/null +++ b/integrations/google-vertex/README.md @@ -0,0 +1,43 @@ +# google-vertex-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack) + +--- + +**Table of Contents** + +- [google-vertex-haystack](#google-vertex-haystack) + - [Installation](#installation) + - [Contributing](#contributing) + - [License](#license) + +## Installation + +```console +pip install google-vertex-haystack +``` + +## Contributing + +`hatch` is the best way to interact with this project, to install it: + +```sh +pip install hatch +``` + +With `hatch` installed, to run all the tests: + +``` +hatch run test +``` + +To run the linters `ruff` and `mypy`: + +``` +hatch run lint:all +``` + +## License + +`google-vertex-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/google-vertex/pyproject.toml b/integrations/google-vertex/pyproject.toml new file mode 100644 index 000000000..2455b4fa9 --- /dev/null +++ b/integrations/google-vertex/pyproject.toml @@ -0,0 +1,174 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "google-vertex-haystack" +dynamic = ["version"] +description = '' +readme = "README.md" +requires-python = ">=3.7" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "deepset GmbH", email = "info@deepset.ai" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "haystack-ai", + "google-cloud-aiplatform", +] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/google-vertex#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/google-vertex" + +[tool.hatch.version] +path = "src/google_vertex_haystack/__about__.py" + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", +] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov = [ + "test-cov", + "cov-report", +] + +[[tool.hatch.envs.all.matrix]] +python = ["3.7", "3.8", "3.9", "3.10", "3.11"] + +[tool.hatch.envs.lint] +detached = true +dependencies = [ + "black>=23.1.0", + "mypy>=1.0.0", + "ruff>=0.0.243", +] +[tool.hatch.envs.lint.scripts] +typing = "mypy --install-types --non-interactive {args:src/google_vertex_haystack tests}" +style = [ + "ruff {args:.}", + "black --check --diff {args:.}", +] +fmt = [ + "black {args:.}", + "ruff --fix {args:.}", + "style", +] +all = [ + "style", + "typing", +] + +[tool.black] +target-version = ["py37"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +target-version = "py37" +line-length = 120 +select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", "S106", "S107", + # Ignore complexity + "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", +] +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.isort] +known-first-party = ["google_vertex_haystack"] + +[tool.ruff.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] + +[tool.coverage.run] +source_pkgs = ["google_vertex_haystack", "tests"] +branch = true +parallel = true +omit = [ + "src/google_vertex_haystack/__about__.py", +] + +[tool.coverage.paths] +google_vertex_haystack = ["src/google_vertex_haystack", "*/google-vertex-haystack/src/google_vertex_haystack"] +tests = ["tests", "*/google-vertex-haystack/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[[tool.mypy.overrides]] +module = [ + "vertexai.*", + "haystack.*", + "pytest.*", + "numpy.*", +] +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = "--strict-markers" +markers = [ + "integration: integration tests", + "embedders: embedders tests", + "generators: generators tests", +] +log_cli = true \ No newline at end of file diff --git a/integrations/google-vertex/src/google_vertex_haystack/__about__.py b/integrations/google-vertex/src/google_vertex_haystack/__about__.py new file mode 100644 index 000000000..0e4fa27cf --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/__about__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +__version__ = "0.0.1" diff --git a/integrations/google-vertex/src/google_vertex_haystack/__init__.py b/integrations/google-vertex/src/google_vertex_haystack/__init__.py new file mode 100644 index 000000000..e873bc332 --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/__init__.py b/integrations/google-vertex/src/google_vertex_haystack/generators/__init__.py new file mode 100644 index 000000000..e873bc332 --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/captioner.py b/integrations/google-vertex/src/google_vertex_haystack/generators/captioner.py new file mode 100644 index 000000000..83322b33b --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/captioner.py @@ -0,0 +1,53 @@ +import logging +from typing import Any, Dict, List, Optional + +import vertexai +from haystack.core.component import component +from haystack.core.serialization import default_from_dict, default_to_dict +from haystack.dataclasses.byte_stream import ByteStream +from vertexai.vision_models import Image, ImageTextModel + +logger = logging.getLogger(__name__) + + +@component +class VertexAIImageCaptioner: + def __init__(self, *, model: str = "imagetext", project_id: str, location: Optional[str] = None, **kwargs): + """ + Generate image captions using a Google Vertex AI model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official Google documentation: + https://cloud.google.com/docs/authentication/provide-credentials-adc + + :param project_id: ID of the GCP project to use. + :param model: Name of the model to use, defaults to "imagetext". + :param location: The default location to use when making API calls, if not set uses us-central-1. + Defaults to None. + :param kwargs: Additional keyword arguments to pass to the model. + For a list of supported arguments see the `ImageTextModel.get_captions()` documentation. + """ + + # Login to GCP. This will fail if user has not set up their gcloud SDK + vertexai.init(project=project_id, location=location) + + self._model_name = model + self._project_id = project_id + self._location = location + self._kwargs = kwargs + + self._model = ImageTextModel.from_pretrained(self._model_name) + + def to_dict(self) -> Dict[str, Any]: + return default_to_dict( + self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageCaptioner": + return default_from_dict(cls, data) + + @component.output_types(captions=List[str]) + def run(self, image: ByteStream): + captions = self._model.get_captions(image=Image(image.data), **self._kwargs) + return {"captions": captions} diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/code_generator.py b/integrations/google-vertex/src/google_vertex_haystack/generators/code_generator.py new file mode 100644 index 000000000..1914af289 --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/code_generator.py @@ -0,0 +1,54 @@ +import logging +from typing import Any, Dict, List, Optional + +import vertexai +from haystack.core.component import component +from haystack.core.serialization import default_from_dict, default_to_dict +from vertexai.language_models import CodeGenerationModel + +logger = logging.getLogger(__name__) + + +@component +class VertexAICodeGenerator: + def __init__(self, *, model: str = "code-bison", project_id: str, location: Optional[str] = None, **kwargs): + """ + Generate code using a Google Vertex AI model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official Google documentation: + https://cloud.google.com/docs/authentication/provide-credentials-adc + + :param project_id: ID of the GCP project to use. + :param model: Name of the model to use, defaults to "text-bison". + :param location: The default location to use when making API calls, if not set uses us-central-1. + Defaults to None. + :param kwargs: Additional keyword arguments to pass to the model. + For a list of supported arguments see the `TextGenerationModel.predict()` documentation. + """ + + # Login to GCP. This will fail if user has not set up their gcloud SDK + vertexai.init(project=project_id, location=location) + + self._model_name = model + self._project_id = project_id + self._location = location + self._kwargs = kwargs + + self._model = CodeGenerationModel.from_pretrained(self._model_name) + + def to_dict(self) -> Dict[str, Any]: + return default_to_dict( + self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "VertexAICodeGenerator": + return default_from_dict(cls, data) + + @component.output_types(answers=List[str]) + def run(self, prefix: str, suffix: Optional[str] = None): + res = self._model.predict(prefix=prefix, suffix=suffix, **self._kwargs) + # Handle the case where the model returns multiple candidates + answers = [c.text for c in res.candidates] if hasattr(res, "candidates") else [res.text] + return {"answers": answers} diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/gemini.py b/integrations/google-vertex/src/google_vertex_haystack/generators/gemini.py new file mode 100644 index 000000000..b01dc6795 --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/gemini.py @@ -0,0 +1,105 @@ +import logging +from typing import Any, Dict, List, Optional, Union + +import vertexai +from haystack.core.component import component +from haystack.core.component.types import Variadic +from haystack.core.serialization import default_from_dict, default_to_dict +from haystack.dataclasses.byte_stream import ByteStream +from vertexai.preview.generative_models import ( + Content, + GenerativeModel, + Part, +) + +logger = logging.getLogger(__name__) + + +@component +class GeminiGenerator: + def __init__(self, *, model: str = "gemini-pro-vision", project_id: str, location: Optional[str] = None, **kwargs): + """ + Multi modal generator using Gemini model via Google Vertex AI. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official Google documentation: + https://cloud.google.com/docs/authentication/provide-credentials-adc + + :param project_id: ID of the GCP project to use. + :param model: Name of the model to use, defaults to "gemini-pro-vision". + :param location: The default location to use when making API calls, if not set uses us-central-1. + Defaults to None. + :param kwargs: Additional keyword arguments to pass to the model. + For a list of supported arguments see the `GenerativeModel.generate_content()` documentation. + """ + + # Login to GCP. This will fail if user has not set up their gcloud SDK + vertexai.init(project=project_id, location=location) + + self._model_name = model + self._project_id = project_id + self._location = location + self._kwargs = kwargs + + if kwargs.get("stream"): + msg = "The `stream` parameter is not supported by the Gemini generator." + raise ValueError(msg) + + self._model = GenerativeModel(self._model_name) + + def to_dict(self) -> Dict[str, Any]: + # TODO: This is not fully implemented yet + return default_to_dict( + self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GeminiGenerator": + # TODO: This is not fully implemented yet + return default_from_dict(cls, data) + + def _convert_part(self, part: Union[str, ByteStream, Part]) -> Part: + if isinstance(part, str): + return Part.from_text(part) + elif isinstance(part, ByteStream): + return Part.from_data(part.data, part.mime_type) + elif isinstance(part, Part): + return part + else: + msg = f"Unsupported type {type(part)} for part {part}" + raise ValueError(msg) + + @component.output_types(answers=List[Union[str, Dict[str, str]]]) + def run(self, parts: Variadic[List[Union[str, ByteStream, Part]]]): + converted_parts = [self._convert_part(p) for p in parts] + + contents = [Content(parts=converted_parts, role="user")] + res = self._model.generate_content(contents=contents, **self._kwargs) + self._model.start_chat() + answers = [] + for candidate in res.candidates: + for part in candidate.content.parts: + if part._raw_part.text != "": + answers.append(part.text) + elif part.function_call is not None: + function_call = { + "name": part.function_call.name, + "args": dict(part.function_call.args.items()), + } + answers.append(function_call) + + return {"answers": answers} + + +# generator = GeminiGenerator(project_id="infinite-byte-223810") +# res = generator.run(["What can you do for me?"]) +# res +# another_res = generator.run(["Can you solve this math problems?", "2 + 2", "3 + 3", "1 / 1"]) +# another_res["answers"] +# from pathlib import Path + +# image = ByteStream.from_file_path( +# Path("/Users/silvanocerza/Downloads/photo_2023-11-07_11-45-42.jpg"), mime_type="image/jpeg" +# ) +# res = generator.run(["What is this about?", image]) +# res["answers"] diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/image_generator.py b/integrations/google-vertex/src/google_vertex_haystack/generators/image_generator.py new file mode 100644 index 000000000..67d270347 --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/image_generator.py @@ -0,0 +1,55 @@ +import logging +from typing import Any, Dict, List, Optional + +import vertexai +from haystack.core.component import component +from haystack.core.serialization import default_from_dict, default_to_dict +from haystack.dataclasses.byte_stream import ByteStream +from vertexai.preview.vision_models import ImageGenerationModel + +logger = logging.getLogger(__name__) + + +@component +class VertexAIImageGenerator: + def __init__(self, *, model: str = "imagetext", project_id: str, location: Optional[str] = None, **kwargs): + """ + Generates images using a Google Vertex AI model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official Google documentation: + https://cloud.google.com/docs/authentication/provide-credentials-adc + + :param project_id: ID of the GCP project to use. + :param model: Name of the model to use, defaults to "imagetext". + :param location: The default location to use when making API calls, if not set uses us-central-1. + Defaults to None. + :param kwargs: Additional keyword arguments to pass to the model. + For a list of supported arguments see the `ImageGenerationModel.generate_images()` documentation. + """ + + # Login to GCP. This will fail if user has not set up their gcloud SDK + vertexai.init(project=project_id, location=location) + + self._model_name = model + self._project_id = project_id + self._location = location + self._kwargs = kwargs + + self._model = ImageGenerationModel.from_pretrained(self._model_name) + + def to_dict(self) -> Dict[str, Any]: + return default_to_dict( + self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageGenerator": + return default_from_dict(cls, data) + + @component.output_types(images=List[ByteStream]) + def run(self, prompt: str, negative_prompt: Optional[str] = None): + negative_prompt = negative_prompt or self._kwargs.get("negative_prompt") + res = self._model.generate_images(prompt=prompt, negative_prompt=negative_prompt, **self._kwargs) + images = [ByteStream(data=i._image_bytes, metadata=i.generation_parameters) for i in res.images] + return {"images": images} diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/question_answering.py b/integrations/google-vertex/src/google_vertex_haystack/generators/question_answering.py new file mode 100644 index 000000000..276364227 --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/question_answering.py @@ -0,0 +1,53 @@ +import logging +from typing import Any, Dict, List, Optional + +import vertexai +from haystack.core.component import component +from haystack.core.serialization import default_from_dict, default_to_dict +from haystack.dataclasses.byte_stream import ByteStream +from vertexai.vision_models import Image, ImageTextModel + +logger = logging.getLogger(__name__) + + +@component +class VertexAIImageQA: + def __init__(self, *, model: str = "imagetext", project_id: str, location: Optional[str] = None, **kwargs): + """ + Answers questions about an image using a Google Vertex AI model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official Google documentation: + https://cloud.google.com/docs/authentication/provide-credentials-adc + + :param project_id: ID of the GCP project to use. + :param model: Name of the model to use, defaults to "imagetext". + :param location: The default location to use when making API calls, if not set uses us-central-1. + Defaults to None. + :param kwargs: Additional keyword arguments to pass to the model. + For a list of supported arguments see the `ImageTextModel.ask_question()` documentation. + """ + + # Login to GCP. This will fail if user has not set up their gcloud SDK + vertexai.init(project=project_id, location=location) + + self._model_name = model + self._project_id = project_id + self._location = location + self._kwargs = kwargs + + self._model = ImageTextModel.from_pretrained(self._model_name) + + def to_dict(self) -> Dict[str, Any]: + return default_to_dict( + self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageQA": + return default_from_dict(cls, data) + + @component.output_types(answers=List[str]) + def run(self, image: ByteStream, question: str): + answers = self._model.ask_question(image=Image(image.data), question=question, **self._kwargs) + return {"answers": answers} diff --git a/integrations/google-vertex/src/google_vertex_haystack/generators/text_generator.py b/integrations/google-vertex/src/google_vertex_haystack/generators/text_generator.py new file mode 100644 index 000000000..6022bcf4f --- /dev/null +++ b/integrations/google-vertex/src/google_vertex_haystack/generators/text_generator.py @@ -0,0 +1,81 @@ +import importlib +import logging +from dataclasses import fields +from typing import Any, Dict, List, Optional + +import vertexai +from haystack.core.component import component +from haystack.core.serialization import default_from_dict, default_to_dict +from vertexai.language_models import TextGenerationModel + +logger = logging.getLogger(__name__) + + +@component +class VertexAITextGenerator: + def __init__(self, *, model: str = "text-bison", project_id: str, location: Optional[str] = None, **kwargs): + """ + Generate text using a Google Vertex AI model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official Google documentation: + https://cloud.google.com/docs/authentication/provide-credentials-adc + + :param project_id: ID of the GCP project to use. + :param model: Name of the model to use, defaults to "text-bison". + :param location: The default location to use when making API calls, if not set uses us-central-1. + Defaults to None. + :param kwargs: Additional keyword arguments to pass to the model. + For a list of supported arguments see the `TextGenerationModel.predict()` documentation. + """ + + # Login to GCP. This will fail if user has not set up their gcloud SDK + vertexai.init(project=project_id, location=location) + + self._model_name = model + self._project_id = project_id + self._location = location + self._kwargs = kwargs + + self._model = TextGenerationModel.from_pretrained(self._model_name) + + def to_dict(self) -> Dict[str, Any]: + data = default_to_dict( + self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs + ) + + if (grounding_source := data["init_parameters"].get("grounding_source")) is not None: + # Handle the grounding source dataclasses + class_type = f"{grounding_source.__module__}.{grounding_source.__class__.__name__}" + init_fields = {f.name: getattr(grounding_source, f.name) for f in fields(grounding_source) if f.init} + data["init_parameters"]["grounding_source"] = { + "type": class_type, + "init_parameters": init_fields, + } + + return data + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "VertexAITextGenerator": + if (grounding_source := data["init_parameters"].get("grounding_source")) is not None: + module_name, class_name = grounding_source["type"].rsplit(".", 1) + module = importlib.import_module(module_name) + data["init_parameters"]["grounding_source"] = getattr(module, class_name)( + **grounding_source["init_parameters"] + ) + return default_from_dict(cls, data) + + @component.output_types(answers=List[str], safety_attributes=Dict[str, float], citations=List[Dict[str, Any]]) + def run(self, prompt: str): + res = self._model.predict(prompt=prompt, **self._kwargs) + + answers = [] + safety_attributes = [] + citations = [] + + for prediction in res.raw_prediction_response.predictions: + answers.append(prediction["content"]) + safety_attributes.append(prediction["safetyAttributes"]) + citations.append(prediction["citationMetadata"]["citations"]) + + return {"answers": answers, "safety_attributes": safety_attributes, "citations": citations} diff --git a/integrations/google-vertex/tests/__init__.py b/integrations/google-vertex/tests/__init__.py new file mode 100644 index 000000000..e873bc332 --- /dev/null +++ b/integrations/google-vertex/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/google-vertex/tests/test_captioner.py b/integrations/google-vertex/tests/test_captioner.py new file mode 100644 index 000000000..bc7e4f829 --- /dev/null +++ b/integrations/google-vertex/tests/test_captioner.py @@ -0,0 +1,76 @@ +from unittest.mock import Mock, patch + +from haystack.dataclasses.byte_stream import ByteStream + +from google_vertex_haystack.generators.captioner import VertexAIImageCaptioner + + +@patch("google_vertex_haystack.generators.captioner.vertexai") +@patch("google_vertex_haystack.generators.captioner.ImageTextModel") +def test_init(mock_model_class, mock_vertexai): + captioner = VertexAIImageCaptioner( + model="imagetext", project_id="myproject-123456", number_of_results=1, language="it" + ) + mock_vertexai.init.assert_called_once_with(project="myproject-123456", location=None) + mock_model_class.from_pretrained.assert_called_once_with("imagetext") + assert captioner._model_name == "imagetext" + assert captioner._project_id == "myproject-123456" + assert captioner._location is None + assert captioner._kwargs == {"number_of_results": 1, "language": "it"} + + +@patch("google_vertex_haystack.generators.captioner.vertexai") +@patch("google_vertex_haystack.generators.captioner.ImageTextModel") +def test_to_dict(_mock_model_class, _mock_vertexai): + captioner = VertexAIImageCaptioner( + model="imagetext", project_id="myproject-123456", number_of_results=1, language="it" + ) + assert captioner.to_dict() == { + "type": "google_vertex_haystack.generators.captioner.VertexAIImageCaptioner", + "init_parameters": { + "model": "imagetext", + "project_id": "myproject-123456", + "location": None, + "number_of_results": 1, + "language": "it", + }, + } + + +@patch("google_vertex_haystack.generators.captioner.vertexai") +@patch("google_vertex_haystack.generators.captioner.ImageTextModel") +def test_from_dict(_mock_model_class, _mock_vertexai): + captioner = VertexAIImageCaptioner.from_dict( + { + "type": "google_vertex_haystack.generators.captioner.VertexAIImageCaptioner", + "init_parameters": { + "model": "imagetext", + "project_id": "myproject-123456", + "number_of_results": 1, + "language": "it", + }, + } + ) + assert captioner._model_name == "imagetext" + assert captioner._project_id == "myproject-123456" + assert captioner._location is None + assert captioner._kwargs == {"number_of_results": 1, "language": "it"} + assert captioner._model is not None + + +@patch("google_vertex_haystack.generators.captioner.vertexai") +@patch("google_vertex_haystack.generators.captioner.ImageTextModel") +def test_run_calls_get_captions(mock_model_class, _mock_vertexai): + mock_model = Mock() + mock_model_class.from_pretrained.return_value = mock_model + captioner = VertexAIImageCaptioner( + model="imagetext", project_id="myproject-123456", number_of_results=1, language="it" + ) + + image = ByteStream(data=b"image data") + captioner.run(image=image) + mock_model.get_captions.assert_called_once() + assert len(mock_model.get_captions.call_args.kwargs) == 3 + assert mock_model.get_captions.call_args.kwargs["image"]._image_bytes == image.data + assert mock_model.get_captions.call_args.kwargs["number_of_results"] == 1 + assert mock_model.get_captions.call_args.kwargs["language"] == "it" diff --git a/integrations/google-vertex/tests/test_code_generator.py b/integrations/google-vertex/tests/test_code_generator.py new file mode 100644 index 000000000..c2a2e5aa9 --- /dev/null +++ b/integrations/google-vertex/tests/test_code_generator.py @@ -0,0 +1,79 @@ +from unittest.mock import Mock, patch + +from vertexai.language_models import TextGenerationResponse + +from google_vertex_haystack.generators.code_generator import VertexAICodeGenerator + + +@patch("google_vertex_haystack.generators.code_generator.vertexai") +@patch("google_vertex_haystack.generators.code_generator.CodeGenerationModel") +def test_init(mock_model_class, mock_vertexai): + generator = VertexAICodeGenerator( + model="code-bison", project_id="myproject-123456", candidate_count=3, temperature=0.5 + ) + mock_vertexai.init.assert_called_once_with(project="myproject-123456", location=None) + mock_model_class.from_pretrained.assert_called_once_with("code-bison") + assert generator._model_name == "code-bison" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == {"candidate_count": 3, "temperature": 0.5} + + +@patch("google_vertex_haystack.generators.code_generator.vertexai") +@patch("google_vertex_haystack.generators.code_generator.CodeGenerationModel") +def test_to_dict(_mock_model_class, _mock_vertexai): + generator = VertexAICodeGenerator( + model="code-bison", project_id="myproject-123456", candidate_count=3, temperature=0.5 + ) + assert generator.to_dict() == { + "type": "google_vertex_haystack.generators.code_generator.VertexAICodeGenerator", + "init_parameters": { + "model": "code-bison", + "project_id": "myproject-123456", + "location": None, + "candidate_count": 3, + "temperature": 0.5, + }, + } + + +@patch("google_vertex_haystack.generators.code_generator.vertexai") +@patch("google_vertex_haystack.generators.code_generator.CodeGenerationModel") +def test_from_dict(_mock_model_class, _mock_vertexai): + generator = VertexAICodeGenerator.from_dict( + { + "type": "google_vertex_haystack.generators.code_generator.VertexAICodeGenerator", + "init_parameters": { + "model": "code-bison", + "project_id": "myproject-123456", + "candidate_count": 2, + "temperature": 0.5, + }, + } + ) + assert generator._model_name == "code-bison" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == {"candidate_count": 2, "temperature": 0.5} + assert generator._model is not None + + +@patch("google_vertex_haystack.generators.code_generator.vertexai") +@patch("google_vertex_haystack.generators.code_generator.CodeGenerationModel") +def test_run_calls_predict(mock_model_class, _mock_vertexai): + mock_model = Mock() + mock_model.predict.return_value = TextGenerationResponse("answer", None) + mock_model_class.from_pretrained.return_value = mock_model + generator = VertexAICodeGenerator( + model="code-bison", project_id="myproject-123456", candidate_count=1, temperature=0.5 + ) + + prefix = "def print_json(data):\n" + generator.run(prefix=prefix) + + mock_model.predict.assert_called_once() + assert len(mock_model.predict.call_args.kwargs) == 4 + assert mock_model.predict.call_args.kwargs["prefix"] == prefix + assert mock_model.predict.call_args.kwargs["suffix"] is None + assert mock_model.predict.call_args.kwargs["candidate_count"] == 1 + assert mock_model.predict.call_args.kwargs["temperature"] == 0.5 diff --git a/integrations/google-vertex/tests/test_image_generator.py b/integrations/google-vertex/tests/test_image_generator.py new file mode 100644 index 000000000..1c5381a48 --- /dev/null +++ b/integrations/google-vertex/tests/test_image_generator.py @@ -0,0 +1,92 @@ +from unittest.mock import Mock, patch + +from vertexai.preview.vision_models import ImageGenerationResponse + +from google_vertex_haystack.generators.image_generator import VertexAIImageGenerator + + +@patch("google_vertex_haystack.generators.image_generator.vertexai") +@patch("google_vertex_haystack.generators.image_generator.ImageGenerationModel") +def test_init(mock_model_class, mock_vertexai): + generator = VertexAIImageGenerator( + model="imagetext", + project_id="myproject-123456", + guidance_scale=12, + number_of_images=3, + ) + mock_vertexai.init.assert_called_once_with(project="myproject-123456", location=None) + mock_model_class.from_pretrained.assert_called_once_with("imagetext") + assert generator._model_name == "imagetext" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == { + "guidance_scale": 12, + "number_of_images": 3, + } + + +@patch("google_vertex_haystack.generators.image_generator.vertexai") +@patch("google_vertex_haystack.generators.image_generator.ImageGenerationModel") +def test_to_dict(_mock_model_class, _mock_vertexai): + generator = VertexAIImageGenerator( + model="imagetext", + project_id="myproject-123456", + guidance_scale=12, + number_of_images=3, + ) + assert generator.to_dict() == { + "type": "google_vertex_haystack.generators.image_generator.VertexAIImageGenerator", + "init_parameters": { + "model": "imagetext", + "project_id": "myproject-123456", + "location": None, + "guidance_scale": 12, + "number_of_images": 3, + }, + } + + +@patch("google_vertex_haystack.generators.image_generator.vertexai") +@patch("google_vertex_haystack.generators.image_generator.ImageGenerationModel") +def test_from_dict(_mock_model_class, _mock_vertexai): + generator = VertexAIImageGenerator.from_dict( + { + "type": "google_vertex_haystack.generators.image_generator.VertexAIImageGenerator", + "init_parameters": { + "model": "imagetext", + "project_id": "myproject-123456", + "location": None, + "guidance_scale": 12, + "number_of_images": 3, + }, + } + ) + assert generator._model_name == "imagetext" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == { + "guidance_scale": 12, + "number_of_images": 3, + } + + +@patch("google_vertex_haystack.generators.image_generator.vertexai") +@patch("google_vertex_haystack.generators.image_generator.ImageGenerationModel") +def test_run_calls_generate_images(mock_model_class, _mock_vertexai): + mock_model = Mock() + mock_model.generate_images.return_value = ImageGenerationResponse(images=[]) + mock_model_class.from_pretrained.return_value = mock_model + generator = VertexAIImageGenerator( + model="imagetext", + project_id="myproject-123456", + guidance_scale=12, + number_of_images=3, + ) + + prompt = "Generate an image of a dog" + negative_prompt = "Generate an image of a cat" + generator.run(prompt=prompt, negative_prompt=negative_prompt) + + mock_model.generate_images.assert_called_once_with( + prompt=prompt, negative_prompt=negative_prompt, guidance_scale=12, number_of_images=3 + ) diff --git a/integrations/google-vertex/tests/test_question_answering.py b/integrations/google-vertex/tests/test_question_answering.py new file mode 100644 index 000000000..3495afcb2 --- /dev/null +++ b/integrations/google-vertex/tests/test_question_answering.py @@ -0,0 +1,83 @@ +from unittest.mock import Mock, patch + +from haystack.dataclasses.byte_stream import ByteStream + +from google_vertex_haystack.generators.question_answering import VertexAIImageQA + + +@patch("google_vertex_haystack.generators.question_answering.vertexai") +@patch("google_vertex_haystack.generators.question_answering.ImageTextModel") +def test_init(mock_model_class, mock_vertexai): + generator = VertexAIImageQA( + model="imagetext", + project_id="myproject-123456", + number_of_results=3, + ) + mock_vertexai.init.assert_called_once_with(project="myproject-123456", location=None) + mock_model_class.from_pretrained.assert_called_once_with("imagetext") + assert generator._model_name == "imagetext" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == {"number_of_results": 3} + + +@patch("google_vertex_haystack.generators.question_answering.vertexai") +@patch("google_vertex_haystack.generators.question_answering.ImageTextModel") +def test_to_dict(_mock_model_class, _mock_vertexai): + generator = VertexAIImageQA( + model="imagetext", + project_id="myproject-123456", + number_of_results=3, + ) + assert generator.to_dict() == { + "type": "google_vertex_haystack.generators.question_answering.VertexAIImageQA", + "init_parameters": { + "model": "imagetext", + "project_id": "myproject-123456", + "location": None, + "number_of_results": 3, + }, + } + + +@patch("google_vertex_haystack.generators.question_answering.vertexai") +@patch("google_vertex_haystack.generators.question_answering.ImageTextModel") +def test_from_dict(_mock_model_class, _mock_vertexai): + generator = VertexAIImageQA.from_dict( + { + "type": "google_vertex_haystack.generators.question_answering.VertexAIImageQA", + "init_parameters": { + "model": "imagetext", + "project_id": "myproject-123456", + "location": None, + "number_of_results": 3, + }, + } + ) + assert generator._model_name == "imagetext" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == {"number_of_results": 3} + + +@patch("google_vertex_haystack.generators.question_answering.vertexai") +@patch("google_vertex_haystack.generators.question_answering.ImageTextModel") +def test_run_calls_ask_question(mock_model_class, _mock_vertexai): + mock_model = Mock() + mock_model.ask_question.return_value = [] + mock_model_class.from_pretrained.return_value = mock_model + generator = VertexAIImageQA( + model="imagetext", + project_id="myproject-123456", + number_of_results=3, + ) + + image = ByteStream(data=b"image data") + question = "What is this?" + generator.run(image=image, question=question) + + mock_model.ask_question.assert_called_once() + assert len(mock_model.ask_question.call_args.kwargs) == 3 + assert mock_model.ask_question.call_args.kwargs["image"]._image_bytes == image.data + assert mock_model.ask_question.call_args.kwargs["number_of_results"] == 3 + assert mock_model.ask_question.call_args.kwargs["question"] == question diff --git a/integrations/google-vertex/tests/test_text_generator.py b/integrations/google-vertex/tests/test_text_generator.py new file mode 100644 index 000000000..f2edbfc3b --- /dev/null +++ b/integrations/google-vertex/tests/test_text_generator.py @@ -0,0 +1,96 @@ +from unittest.mock import MagicMock, Mock, patch + +from vertexai.language_models import GroundingSource + +from google_vertex_haystack.generators.text_generator import VertexAITextGenerator + + +@patch("google_vertex_haystack.generators.text_generator.vertexai") +@patch("google_vertex_haystack.generators.text_generator.TextGenerationModel") +def test_init(mock_model_class, mock_vertexai): + grounding_source = GroundingSource.VertexAISearch("1234", "us-central-1") + generator = VertexAITextGenerator( + model="text-bison", project_id="myproject-123456", temperature=0.2, grounding_source=grounding_source + ) + mock_vertexai.init.assert_called_once_with(project="myproject-123456", location=None) + mock_model_class.from_pretrained.assert_called_once_with("text-bison") + assert generator._model_name == "text-bison" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == {"temperature": 0.2, "grounding_source": grounding_source} + + +@patch("google_vertex_haystack.generators.text_generator.vertexai") +@patch("google_vertex_haystack.generators.text_generator.TextGenerationModel") +def test_to_dict(_mock_model_class, _mock_vertexai): + grounding_source = GroundingSource.VertexAISearch("1234", "us-central-1") + generator = VertexAITextGenerator( + model="text-bison", project_id="myproject-123456", temperature=0.2, grounding_source=grounding_source + ) + assert generator.to_dict() == { + "type": "google_vertex_haystack.generators.text_generator.VertexAITextGenerator", + "init_parameters": { + "model": "text-bison", + "project_id": "myproject-123456", + "location": None, + "temperature": 0.2, + "grounding_source": { + "type": "vertexai.language_models._language_models.VertexAISearch", + "init_parameters": { + "location": "us-central-1", + "data_store_id": "1234", + "project": None, + "disable_attribution": False, + }, + }, + }, + } + + +@patch("google_vertex_haystack.generators.text_generator.vertexai") +@patch("google_vertex_haystack.generators.text_generator.TextGenerationModel") +def test_from_dict(_mock_model_class, _mock_vertexai): + generator = VertexAITextGenerator.from_dict( + { + "type": "google_vertex_haystack.generators.text_generator.VertexAITextGenerator", + "init_parameters": { + "model": "text-bison", + "project_id": "myproject-123456", + "location": None, + "temperature": 0.2, + "grounding_source": { + "type": "vertexai.language_models._language_models.VertexAISearch", + "init_parameters": { + "location": "us-central-1", + "data_store_id": "1234", + "project": None, + "disable_attribution": False, + }, + }, + }, + } + ) + assert generator._model_name == "text-bison" + assert generator._project_id == "myproject-123456" + assert generator._location is None + assert generator._kwargs == { + "temperature": 0.2, + "grounding_source": GroundingSource.VertexAISearch("1234", "us-central-1"), + } + + +@patch("google_vertex_haystack.generators.text_generator.vertexai") +@patch("google_vertex_haystack.generators.text_generator.TextGenerationModel") +def test_run_calls_get_captions(mock_model_class, _mock_vertexai): + mock_model = Mock() + mock_model.predict.return_value = MagicMock() + mock_model_class.from_pretrained.return_value = mock_model + grounding_source = GroundingSource.VertexAISearch("1234", "us-central-1") + generator = VertexAITextGenerator( + model="text-bison", project_id="myproject-123456", temperature=0.2, grounding_source=grounding_source + ) + + prompt = "What is the answer?" + generator.run(prompt=prompt) + + mock_model.predict.assert_called_once_with(prompt=prompt, temperature=0.2, grounding_source=grounding_source)