From 66bede7bd601c1e6dcd071ba0cae8ef9f4275974 Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Thu, 8 Feb 2024 10:23:31 +0100 Subject: [PATCH 01/12] Change docstring labeler workflow to add Daria as reviewer (#357) --- .github/workflows/CI_docstring_labeler.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/CI_docstring_labeler.yml b/.github/workflows/CI_docstring_labeler.yml index 151bdb321..5410b6f22 100644 --- a/.github/workflows/CI_docstring_labeler.yml +++ b/.github/workflows/CI_docstring_labeler.yml @@ -54,3 +54,9 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: gh pr edit ${{ github.event.pull_request.html_url }} --add-label "type:documentation" + + - name: Add reviewer + if: ${{ steps.run-check.outputs.should_run == 'true' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh pr edit ${{ github.event.pull_request.html_url }} --add-reviewer dfokina From c0329345da7bfdb025113bf551f20930a6b0b53e Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Thu, 8 Feb 2024 10:27:11 +0100 Subject: [PATCH 02/12] llama.cpp: generate api docs (#353) --- .github/workflows/llama_cpp.yml | 4 ++++ integrations/llama_cpp/pydoc/config.yml | 28 +++++++++++++++++++++++++ integrations/llama_cpp/pyproject.toml | 5 ++++- 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 integrations/llama_cpp/pydoc/config.yml diff --git a/.github/workflows/llama_cpp.yml b/.github/workflows/llama_cpp.yml index 700e50b8e..89c7e5426 100644 --- a/.github/workflows/llama_cpp.yml +++ b/.github/workflows/llama_cpp.yml @@ -52,5 +52,9 @@ jobs: if: matrix.python-version == '3.9' && runner.os == 'Linux' run: hatch run lint:all + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests run: hatch run cov diff --git a/integrations/llama_cpp/pydoc/config.yml b/integrations/llama_cpp/pydoc/config.yml new file mode 100644 index 000000000..4c511bae0 --- /dev/null +++ b/integrations/llama_cpp/pydoc/config.yml @@ -0,0 +1,28 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.generators.llama_cpp.generator", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Llama.cpp integration for Haystack + category_slug: haystack-integrations + title: Llama.cpp + slug: integrations-llama-cpp + order: 110 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_llama_cpp.md \ No newline at end of file diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml index 80138bc7b..1b165dcaf 100644 --- a/integrations/llama_cpp/pyproject.toml +++ b/integrations/llama_cpp/pyproject.toml @@ -50,6 +50,7 @@ git_describe_command = 'git describe --tags --match="integrations/llama_cpp-v[0- dependencies = [ "coverage[toml]>=6.5", "pytest", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" @@ -62,7 +63,9 @@ cov = [ "test-cov", "cov-report", ] - +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] From c64763d2d53e92c7f175eea3ff6caef5b7dce65f Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 8 Feb 2024 10:49:40 +0100 Subject: [PATCH 03/12] feat: Generate unstructured API docs (#350) * Generate unstructured API docs * add working-directory * update module import path * upd path * centralize working directory * revert * Update unstructured.yml * Update unstructured.yml --------- Co-authored-by: Daria Fokina Co-authored-by: Stefano Fiorucci --- .github/workflows/unstructured.yml | 9 +++++-- integrations/unstructured/pydoc/config.yml | 28 ++++++++++++++++++++++ integrations/unstructured/pyproject.toml | 4 ++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 integrations/unstructured/pydoc/config.yml diff --git a/.github/workflows/unstructured.yml b/.github/workflows/unstructured.yml index 77ebb10ca..d12bf4daf 100644 --- a/.github/workflows/unstructured.yml +++ b/.github/workflows/unstructured.yml @@ -8,7 +8,7 @@ on: pull_request: paths: - "integrations/unstructured/**" - - ".github/workflows/unstructured.yml" + - ".github/workflows/unstructured.yml" concurrency: group: unstructured-${{ github.head_ref }} @@ -59,6 +59,11 @@ jobs: if: matrix.python-version == '3.9' run: hatch run lint:all + - name: Generate docs + working-directory: integrations/unstructured + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests - working-directory: integrations/unstructured + working-directory: integrations/unstructured run: hatch run cov diff --git a/integrations/unstructured/pydoc/config.yml b/integrations/unstructured/pydoc/config.yml new file mode 100644 index 000000000..77c17fac6 --- /dev/null +++ b/integrations/unstructured/pydoc/config.yml @@ -0,0 +1,28 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.converters.unstructured.converter", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Unstructured integration for Haystack + category_slug: haystack-integrations + title: Unstructured + slug: integrations-unstructured + order: 170 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_unstructured.md diff --git a/integrations/unstructured/pyproject.toml b/integrations/unstructured/pyproject.toml index 9cc2a0c6a..7366a8adf 100644 --- a/integrations/unstructured/pyproject.toml +++ b/integrations/unstructured/pyproject.toml @@ -49,6 +49,7 @@ dependencies = [ "coverage[toml]>=6.5", "pytest", "pytest-xdist", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" @@ -61,6 +62,9 @@ cov = [ "test-cov", "cov-report", ] +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] From 5096aea315d16e2443af94b2ae636189052cd26b Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:52:43 +0100 Subject: [PATCH 04/12] Revert "Change docstring labeler workflow to add Daria as reviewer (#357)" (#360) This reverts commit 66bede7bd601c1e6dcd071ba0cae8ef9f4275974. --- .github/workflows/CI_docstring_labeler.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/CI_docstring_labeler.yml b/.github/workflows/CI_docstring_labeler.yml index 5410b6f22..151bdb321 100644 --- a/.github/workflows/CI_docstring_labeler.yml +++ b/.github/workflows/CI_docstring_labeler.yml @@ -54,9 +54,3 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: gh pr edit ${{ github.event.pull_request.html_url }} --add-label "type:documentation" - - - name: Add reviewer - if: ${{ steps.run-check.outputs.should_run == 'true' }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: gh pr edit ${{ github.event.pull_request.html_url }} --add-reviewer dfokina From dac86f15d54a7327b59a97846db2225db8fe3d48 Mon Sep 17 00:00:00 2001 From: Madeesh Kannan Date: Thu, 8 Feb 2024 13:16:05 +0100 Subject: [PATCH 05/12] ci: Generate API docs for Pinecone (#359) * ci: Generate API docs for Pinecone * Add working directory --- .github/workflows/pinecone.yml | 8 +++ integrations/pinecone/pydoc/config.yml | 32 +++++++++++ integrations/pinecone/pyproject.toml | 75 ++++++++------------------ 3 files changed, 63 insertions(+), 52 deletions(-) create mode 100644 integrations/pinecone/pydoc/config.yml diff --git a/.github/workflows/pinecone.yml b/.github/workflows/pinecone.yml index fe1b1d456..a82fb74de 100644 --- a/.github/workflows/pinecone.yml +++ b/.github/workflows/pinecone.yml @@ -10,6 +10,10 @@ on: - "integrations/pinecone/**" - ".github/workflows/pinecone.yml" +defaults: + run: + working-directory: integrations/pinecone + concurrency: group: pinecone-${{ github.head_ref }} cancel-in-progress: true @@ -46,6 +50,10 @@ jobs: if: matrix.python-version == '3.9' run: hatch run lint:all + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests working-directory: integrations/pinecone run: hatch run cov diff --git a/integrations/pinecone/pydoc/config.yml b/integrations/pinecone/pydoc/config.yml new file mode 100644 index 000000000..f2d6b338b --- /dev/null +++ b/integrations/pinecone/pydoc/config.yml @@ -0,0 +1,32 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: + [ + "haystack_integrations.components.retrievers.pinecone.dense_retriever", + "haystack_integrations.document_stores.pinecone.document_store", + "haystack_integrations.document_stores.pinecone.errors", + "haystack_integrations.document_stores.pinecone.filters", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Pinecone integration for Haystack + category_slug: haystack-integrations + title: Pinecone + slug: integrations-pinecone + order: 150 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_pinecone.md diff --git a/integrations/pinecone/pyproject.toml b/integrations/pinecone/pyproject.toml index c95ee0aac..f7dbe8df9 100644 --- a/integrations/pinecone/pyproject.toml +++ b/integrations/pinecone/pyproject.toml @@ -10,9 +10,7 @@ readme = "README.md" requires-python = ">=3.8" license = "Apache-2.0" keywords = [] -authors = [ - { name = "deepset GmbH", email = "info@deepset.ai" }, -] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", @@ -25,8 +23,8 @@ classifiers = [ ] dependencies = [ "haystack-ai", - "pinecone-client<3", # our implementation is not compatible with pinecone-client>=3 - # see https://github.com/deepset-ai/haystack-core-integrations/issues/223 + "pinecone-client<3", # our implementation is not compatible with pinecone-client>=3 + # see https://github.com/deepset-ai/haystack-core-integrations/issues/223 ] [project.urls] @@ -50,47 +48,28 @@ dependencies = [ "coverage[toml]>=6.5", "pytest", "pytest-xdist", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] # Pinecone tests are slow (require HTTP requests), so we run them in parallel # with pytest-xdist (https://pytest-xdist.readthedocs.io/en/stable/distribution.html) test = "pytest -n auto --maxprocesses=2 {args:tests}" test-cov = "coverage run -m pytest -n auto --maxprocesses=2 {args:tests}" -cov-report = [ - "- coverage combine", - "coverage report", -] -cov = [ - "test-cov", - "cov-report", -] +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] +docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] [tool.hatch.envs.lint] detached = true -dependencies = [ - "black>=23.1.0", - "mypy>=1.0.0", - "ruff>=0.0.243", - "numpy", -] +dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243", "numpy"] [tool.hatch.envs.lint.scripts] typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" -style = [ - "ruff {args:.}", - "black --check --diff {args:.}", -] -fmt = [ - "black {args:.}", - "ruff --fix {args:.}", - "style", -] -all = [ - "style", - "typing", -] +style = ["ruff {args:.}", "black --check --diff {args:.}"] +fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] +all = ["style", "typing"] [tool.hatch.metadata] allow-direct-references = true @@ -136,9 +115,15 @@ ignore = [ # Allow boolean positional values in function calls, like `dict.get(... True)` "FBT003", # Ignore checks for possible passwords - "S105", "S106", "S107", + "S105", + "S106", + "S107", # Ignore complexity - "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", ] unfixable = [ # Don't touch unused imports @@ -159,33 +144,19 @@ ban-relative-imports = "parents" source_pkgs = ["src", "tests"] branch = true parallel = true -omit = [ - "examples" -] +omit = ["examples"] [tool.coverage.paths] pinecone_haystack = ["src/*"] tests = ["tests"] [tool.coverage.report] -exclude_lines = [ - "no cov", - "if __name__ == .__main__.:", - "if TYPE_CHECKING:", -] +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.pytest.ini_options] minversion = "6.0" -markers = [ - "unit: unit tests", - "integration: integration tests" -] +markers = ["unit: unit tests", "integration: integration tests"] [[tool.mypy.overrides]] -module = [ - "pinecone.*", - "haystack.*", - "haystack_integrations.*", - "pytest.*" -] +module = ["pinecone.*", "haystack.*", "haystack_integrations.*", "pytest.*"] ignore_missing_imports = true From ee5f3c0f1917add38f5f479550116173b23a82f2 Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Thu, 8 Feb 2024 13:51:37 +0100 Subject: [PATCH 06/12] amazon bedrock: generate api docs (#326) * amazon bedrock: generate api docs * path upd * add dependency * Update amazon_bedrock.yml * add files --- .github/workflows/amazon_bedrock.yml | 4 +++ integrations/amazon_bedrock/pydoc/config.yml | 31 ++++++++++++++++++++ integrations/amazon_bedrock/pyproject.toml | 5 +++- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 integrations/amazon_bedrock/pydoc/config.yml diff --git a/.github/workflows/amazon_bedrock.yml b/.github/workflows/amazon_bedrock.yml index cae0ddb1b..75f881a50 100644 --- a/.github/workflows/amazon_bedrock.yml +++ b/.github/workflows/amazon_bedrock.yml @@ -52,5 +52,9 @@ jobs: if: matrix.python-version == '3.9' && runner.os == 'Linux' run: hatch run lint:all + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests run: hatch run cov diff --git a/integrations/amazon_bedrock/pydoc/config.yml b/integrations/amazon_bedrock/pydoc/config.yml new file mode 100644 index 000000000..1f917a0ec --- /dev/null +++ b/integrations/amazon_bedrock/pydoc/config.yml @@ -0,0 +1,31 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.generators.amazon_bedrock.generator", + "haystack_integrations.components.generators.amazon_bedrock.adapters", + "haystack_integrations.components.generators.amazon_bedrock.errors", + "haystack_integrations.components.generators.amazon_bedrock.handlers", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Amazon Bedrock integration for Haystack + category_slug: haystack-integrations + title: Amazon Bedrock + slug: integrations-amazon-bedrock + order: 10 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_amazon_bedrock.md diff --git a/integrations/amazon_bedrock/pyproject.toml b/integrations/amazon_bedrock/pyproject.toml index 6a2ce3eab..8527d27a1 100644 --- a/integrations/amazon_bedrock/pyproject.toml +++ b/integrations/amazon_bedrock/pyproject.toml @@ -50,6 +50,7 @@ git_describe_command = 'git describe --tags --match="integrations/amazon_bedrock dependencies = [ "coverage[toml]>=6.5", "pytest", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" @@ -62,7 +63,9 @@ cov = [ "test-cov", "cov-report", ] - +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] From dd92c1b3be3b6b3ae7b7007ae129e35a84e7011f Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 8 Feb 2024 14:18:00 +0100 Subject: [PATCH 07/12] feat: Add Amazon Bedrock chat model support (#333) * Add AmazonBedrockChatGenerator, add Anthropic Claude support * Add Meta Llama 2 chat model support --- .../generators/amazon_bedrock/__init__.py | 3 +- .../amazon_bedrock/chat/__init__.py | 3 + .../amazon_bedrock/chat/adapters.py | 266 ++++++++++++++++++ .../amazon_bedrock/chat/chat_generator.py | 249 ++++++++++++++++ .../generators/amazon_bedrock/generator.py | 2 +- .../generators/amazon_bedrock/handlers.py | 18 +- .../tests/test_amazon_chat_bedrock.py | 250 ++++++++++++++++ 7 files changed, 786 insertions(+), 5 deletions(-) create mode 100644 integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py create mode 100644 integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py create mode 100644 integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py create mode 100644 integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py index 236347b61..2d33beb42 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +from .chat.chat_generator import AmazonBedrockChatGenerator from .generator import AmazonBedrockGenerator -__all__ = ["AmazonBedrockGenerator"] +__all__ = ["AmazonBedrockGenerator", "AmazonBedrockChatGenerator"] diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py new file mode 100644 index 000000000..e873bc332 --- /dev/null +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py new file mode 100644 index 000000000..a4eefe321 --- /dev/null +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py @@ -0,0 +1,266 @@ +import json +import logging +from abc import ABC, abstractmethod +from typing import Any, Callable, Dict, List + +from botocore.eventstream import EventStream +from haystack.dataclasses import ChatMessage, ChatRole, StreamingChunk +from transformers import AutoTokenizer, PreTrainedTokenizer + +from haystack_integrations.components.generators.amazon_bedrock.handlers import DefaultPromptHandler + +logger = logging.getLogger(__name__) + + +class BedrockModelChatAdapter(ABC): + """ + Base class for Amazon Bedrock chat model adapters. + """ + + def __init__(self, generation_kwargs: Dict[str, Any]) -> None: + self.generation_kwargs = generation_kwargs + + @abstractmethod + def prepare_body(self, messages: List[ChatMessage], **inference_kwargs) -> Dict[str, Any]: + """Prepares the body for the Amazon Bedrock request.""" + + def get_responses(self, response_body: Dict[str, Any]) -> List[ChatMessage]: + """Extracts the responses from the Amazon Bedrock response.""" + return self._extract_messages_from_response(self.response_body_message_key(), response_body) + + def get_stream_responses(self, stream: EventStream, stream_handler: Callable[[StreamingChunk], None]) -> List[str]: + tokens: List[str] = [] + for event in stream: + chunk = event.get("chunk") + if chunk: + decoded_chunk = json.loads(chunk["bytes"].decode("utf-8")) + token = self._extract_token_from_stream(decoded_chunk) + # take all the rest key/value pairs from the chunk, add them to the metadata + stream_metadata = {k: v for (k, v) in decoded_chunk.items() if v != token} + stream_chunk = StreamingChunk(content=token, meta=stream_metadata) + # callback the stream handler with StreamingChunk + stream_handler(stream_chunk) + tokens.append(token) + responses = ["".join(tokens).lstrip()] + return responses + + @staticmethod + def _update_params(target_dict: Dict[str, Any], updates_dict: Dict[str, Any]) -> None: + """ + Updates target_dict with values from updates_dict. Merges lists instead of overriding them. + + :param target_dict: The dictionary to update. + :param updates_dict: The dictionary with updates. + """ + for key, value in updates_dict.items(): + if key in target_dict and isinstance(target_dict[key], list) and isinstance(value, list): + # Merge lists and remove duplicates + target_dict[key] = sorted(set(target_dict[key] + value)) + else: + # Override the value in target_dict + target_dict[key] = value + + def _get_params(self, inference_kwargs: Dict[str, Any], default_params: Dict[str, Any]) -> Dict[str, Any]: + """ + Merges params from inference_kwargs with the default params and self.generation_kwargs. + Uses a helper function to merge lists or override values as necessary. + + :param inference_kwargs: The inference kwargs to merge. + :param default_params: The default params to start with. + :return: The merged params. + """ + # Start with a copy of default_params + kwargs = default_params.copy() + + # Update the default params with self.generation_kwargs and finally inference_kwargs + self._update_params(kwargs, self.generation_kwargs) + self._update_params(kwargs, inference_kwargs) + + return kwargs + + def _ensure_token_limit(self, prompt: str) -> str: + resize_info = self.check_prompt(prompt) + if resize_info["prompt_length"] != resize_info["new_prompt_length"]: + logger.warning( + "The prompt was truncated from %s tokens to %s tokens so that the prompt length and " + "the answer length (%s tokens) fit within the model's max token limit (%s tokens). " + "Shorten the prompt or it will be cut off.", + resize_info["prompt_length"], + max(0, resize_info["model_max_length"] - resize_info["max_length"]), # type: ignore + resize_info["max_length"], + resize_info["model_max_length"], + ) + return str(resize_info["resized_prompt"]) + + @abstractmethod + def check_prompt(self, prompt: str) -> Dict[str, Any]: + """ + Checks the prompt length and resizes it if necessary. + + :param prompt: The prompt to check. + :return: A dictionary containing the resized prompt and additional information. + """ + + def _extract_messages_from_response(self, message_tag: str, response_body: Dict[str, Any]) -> List[ChatMessage]: + metadata = {k: v for (k, v) in response_body.items() if k != message_tag} + return [ChatMessage.from_assistant(response_body[message_tag], meta=metadata)] + + @abstractmethod + def response_body_message_key(self) -> str: + """Returns the key for the message in the response body.""" + + @abstractmethod + def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + """Extracts the token from a streaming chunk.""" + + +class AnthropicClaudeChatAdapter(BedrockModelChatAdapter): + """ + Model adapter for the Anthropic Claude model. + """ + + ANTHROPIC_USER_TOKEN = "\n\nHuman:" + ANTHROPIC_ASSISTANT_TOKEN = "\n\nAssistant:" + + def __init__(self, generation_kwargs: Dict[str, Any]): + super().__init__(generation_kwargs) + + # We pop the model_max_length as it is not sent to the model + # but used to truncate the prompt if needed + # Anthropic Claude has a limit of at least 100000 tokens + # https://docs.anthropic.com/claude/reference/input-and-output-sizes + model_max_length = self.generation_kwargs.pop("model_max_length", 100000) + + # Truncate prompt if prompt tokens > model_max_length-max_length + # (max_length is the length of the generated text) + # TODO use Anthropic tokenizer to get the precise prompt length + # See https://github.com/anthropics/anthropic-sdk-python?tab=readme-ov-file#token-counting + self.prompt_handler = DefaultPromptHandler( + tokenizer="gpt2", + model_max_length=model_max_length, + max_length=self.generation_kwargs.get("max_tokens_to_sample") or 512, + ) + + def prepare_body(self, messages: List[ChatMessage], **inference_kwargs) -> Dict[str, Any]: + default_params = { + "max_tokens_to_sample": self.generation_kwargs.get("max_tokens_to_sample") or 512, + "stop_sequences": ["\n\nHuman:"], + } + + # combine stop words with default stop sequences, remove stop_words as Anthropic does not support it + stop_sequences = inference_kwargs.get("stop_sequences", []) + inference_kwargs.pop("stop_words", []) + if stop_sequences: + inference_kwargs["stop_sequences"] = stop_sequences + params = self._get_params(inference_kwargs, default_params) + body = {"prompt": self.prepare_chat_messages(messages=messages), **params} + return body + + def prepare_chat_messages(self, messages: List[ChatMessage]) -> str: + conversation = [] + for index, message in enumerate(messages): + if message.is_from(ChatRole.USER): + conversation.append(f"{AnthropicClaudeChatAdapter.ANTHROPIC_USER_TOKEN} {message.content.strip()}") + elif message.is_from(ChatRole.ASSISTANT): + conversation.append(f"{AnthropicClaudeChatAdapter.ANTHROPIC_ASSISTANT_TOKEN} {message.content.strip()}") + elif message.is_from(ChatRole.FUNCTION): + error_message = "Anthropic does not support function calls." + raise ValueError(error_message) + elif message.is_from(ChatRole.SYSTEM) and index == 0: + # Until we transition to the new chat message format system messages will be ignored + # see https://docs.anthropic.com/claude/reference/messages_post for more details + logger.warning( + "System messages are not fully supported by the current version of Claude and will be ignored." + ) + else: + invalid_role = f"Invalid role {message.role} for message {message.content}" + raise ValueError(invalid_role) + + prepared_prompt = "".join(conversation) + AnthropicClaudeChatAdapter.ANTHROPIC_ASSISTANT_TOKEN + " " + return self._ensure_token_limit(prepared_prompt) + + def check_prompt(self, prompt: str) -> Dict[str, Any]: + return self.prompt_handler(prompt) + + def response_body_message_key(self) -> str: + return "completion" + + def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + return chunk.get("completion", "") + + +class MetaLlama2ChatAdapter(BedrockModelChatAdapter): + """ + Model adapter for the Meta Llama 2 models. + """ + + chat_template = ( + "{% if messages[0]['role'] == 'system' %}" + "{% set loop_messages = messages[1:] %}" + "{% set system_message = messages[0]['content'] %}" + "{% else %}" + "{% set loop_messages = messages %}" + "{% set system_message = false %}" + "{% endif %}" + "{% for message in loop_messages %}" + "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}" + "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}" + "{% endif %}" + "{% if loop.index0 == 0 and system_message != false %}" + "{% set content = '<>\n' + system_message + '\n<>\n\n' + message['content'] %}" + "{% else %}" + "{% set content = message['content'] %}" + "{% endif %}" + "{% if message['role'] == 'user' %}" + "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}" + "{% elif message['role'] == 'assistant' %}" + "{{ ' ' + content.strip() + ' ' + eos_token }}" + "{% endif %}" + "{% endfor %}" + ) + + def __init__(self, generation_kwargs: Dict[str, Any]) -> None: + super().__init__(generation_kwargs) + # We pop the model_max_length as it is not sent to the model + # but used to truncate the prompt if needed + # Llama 2 has context window size of 4096 tokens + # with some exceptions when the context window has been extended + model_max_length = self.generation_kwargs.pop("model_max_length", 4096) + + # Use `google/flan-t5-base` as it's also BPE sentencepiece tokenizer just like llama 2 + # a) we should get good estimates for the prompt length (empirically close to llama 2) + # b) we can use apply_chat_template with the template above to delineate ChatMessages + tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base") + tokenizer.bos_token = "" + tokenizer.eos_token = "" + tokenizer.unk_token = "" + self.prompt_handler = DefaultPromptHandler( + tokenizer=tokenizer, + model_max_length=model_max_length, + max_length=self.generation_kwargs.get("max_gen_len") or 512, + ) + + def prepare_body(self, messages: List[ChatMessage], **inference_kwargs) -> Dict[str, Any]: + default_params = {"max_gen_len": self.generation_kwargs.get("max_gen_len") or 512} + + # combine stop words with default stop sequences, remove stop_words as MetaLlama2 does not support it + stop_sequences = inference_kwargs.get("stop_sequences", []) + inference_kwargs.pop("stop_words", []) + if stop_sequences: + inference_kwargs["stop_sequences"] = stop_sequences + params = self._get_params(inference_kwargs, default_params) + body = {"prompt": self.prepare_chat_messages(messages=messages), **params} + return body + + def prepare_chat_messages(self, messages: List[ChatMessage]) -> str: + prepared_prompt: str = self.prompt_handler.tokenizer.apply_chat_template( + conversation=messages, tokenize=False, chat_template=self.chat_template + ) + return self._ensure_token_limit(prepared_prompt) + + def check_prompt(self, prompt: str) -> Dict[str, Any]: + return self.prompt_handler(prompt) + + def response_body_message_key(self) -> str: + return "generation" + + def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + return chunk.get("generation", "") diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py new file mode 100644 index 000000000..804d44413 --- /dev/null +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -0,0 +1,249 @@ +import json +import logging +import re +from typing import Any, Callable, ClassVar, Dict, List, Optional, Type + +import boto3 +from botocore.exceptions import BotoCoreError, ClientError +from haystack import component, default_from_dict, default_to_dict +from haystack.components.generators.utils import deserialize_callback_handler +from haystack.dataclasses import ChatMessage, StreamingChunk + +from haystack_integrations.components.generators.amazon_bedrock.errors import ( + AmazonBedrockConfigurationError, + AmazonBedrockInferenceError, + AWSConfigurationError, +) + +from .adapters import AnthropicClaudeChatAdapter, BedrockModelChatAdapter, MetaLlama2ChatAdapter + +logger = logging.getLogger(__name__) + +AWS_CONFIGURATION_KEYS = [ + "aws_access_key_id", + "aws_secret_access_key", + "aws_session_token", + "aws_region_name", + "aws_profile_name", +] + + +@component +class AmazonBedrockChatGenerator: + """ + AmazonBedrockChatGenerator enables text generation via Amazon Bedrock chat hosted models. For example, to use + the Anthropic Claude model, simply initialize the AmazonBedrockChatGenerator with the 'anthropic.claude-v2' + model name. + + ```python + from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator + from haystack.dataclasses import ChatMessage + from haystack.components.generators.utils import print_streaming_chunk + + messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"), + ChatMessage.from_user("What's Natural Language Processing?")] + + + client = AmazonBedrockChatGenerator(model="anthropic.claude-v2", streaming_callback=print_streaming_chunk) + client.run(messages, generation_kwargs={"max_tokens_to_sample": 512}) + + ``` + + If you prefer non-streaming mode, simply remove the `streaming_callback` parameter, capture the return value of the + component's run method and the AmazonBedrockChatGenerator will return the response in a non-streaming mode. + """ + + SUPPORTED_MODEL_PATTERNS: ClassVar[Dict[str, Type[BedrockModelChatAdapter]]] = { + r"anthropic.claude.*": AnthropicClaudeChatAdapter, + r"meta.llama2.*": MetaLlama2ChatAdapter, + } + + def __init__( + self, + model: str, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + aws_session_token: Optional[str] = None, + aws_region_name: Optional[str] = None, + aws_profile_name: Optional[str] = None, + generation_kwargs: Optional[Dict[str, Any]] = None, + stop_words: Optional[List[str]] = None, + streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, + ): + """ + Initializes the AmazonBedrockChatGenerator with the provided parameters. The parameters are passed to the + Amazon Bedrock client. + + Note that the AWS credentials are not required if the AWS environment is configured correctly. These are loaded + automatically from the environment or the AWS configuration file and do not need to be provided explicitly via + the constructor. If the AWS environment is not configured users need to provide the AWS credentials via the + constructor. Aside from model, three required parameters are `aws_access_key_id`, `aws_secret_access_key`, + and `aws_region_name`. + + :param model: The model to use for generation. The model must be available in Amazon Bedrock. The model has to + be specified in the format outlined in the Amazon Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html). + :param aws_access_key_id: AWS access key ID. + :param aws_secret_access_key: AWS secret access key. + :param aws_session_token: AWS session token. + :param aws_region_name: AWS region name. + :param aws_profile_name: AWS profile name. + :param generation_kwargs: Additional generation keyword arguments passed to the model. The defined keyword + parameters are specific to a specific model and can be found in the model's documentation. For example, the + Anthropic Claude generation parameters can be found [here](https://docs.anthropic.com/claude/reference/complete_post). + :param stop_words: A list of stop words that stop model generation when encountered. They can be provided via + this parameter or via models generation_kwargs under a model's specific key for stop words. For example, the + Anthropic Claude stop words are provided via the `stop_sequences` key. + :param streaming_callback: A callback function that is called when a new chunk is received from the stream. + By default, the model is not set up for streaming. To enable streaming simply set this parameter to a callback + function that will handle the streaming chunks. The callback function will receive a StreamingChunk object and + switch the streaming mode on. + """ + if not model: + msg = "'model' cannot be None or empty string" + raise ValueError(msg) + self.model = model + + # get the model adapter for the given model + model_adapter_cls = self.get_model_adapter(model=model) + if not model_adapter_cls: + msg = f"AmazonBedrockGenerator doesn't support the model {model}." + raise AmazonBedrockConfigurationError(msg) + self.model_adapter = model_adapter_cls(generation_kwargs or {}) + + # create the AWS session and client + try: + session = self.get_aws_session( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + aws_region_name=aws_region_name, + aws_profile_name=aws_profile_name, + ) + self.client = session.client("bedrock-runtime") + except Exception as exception: + msg = ( + "Could not connect to Amazon Bedrock. Make sure the AWS environment is configured correctly. " + "See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration" + ) + raise AmazonBedrockConfigurationError(msg) from exception + + self.stop_words = stop_words or [] + self.streaming_callback = streaming_callback + + def invoke(self, *args, **kwargs): + kwargs = kwargs.copy() + messages: List[ChatMessage] = kwargs.pop("messages", []) + # check if the prompt is a list of ChatMessage objects + if not ( + isinstance(messages, list) + and len(messages) > 0 + and all(isinstance(message, ChatMessage) for message in messages) + ): + msg = f"The model {self.model} requires a list of ChatMessage objects as a prompt." + raise ValueError(msg) + + body = self.model_adapter.prepare_body(messages=messages, stop_words=self.stop_words, **kwargs) + try: + if self.streaming_callback: + response = self.client.invoke_model_with_response_stream( + body=json.dumps(body), modelId=self.model, accept="application/json", contentType="application/json" + ) + response_stream = response["body"] + responses = self.model_adapter.get_stream_responses( + stream=response_stream, stream_handler=self.streaming_callback + ) + else: + response = self.client.invoke_model( + body=json.dumps(body), modelId=self.model, accept="application/json", contentType="application/json" + ) + response_body = json.loads(response.get("body").read().decode("utf-8")) + responses = self.model_adapter.get_responses(response_body=response_body) + except ClientError as exception: + msg = f"Could not inference Amazon Bedrock model {self.model} due: {exception}" + raise AmazonBedrockInferenceError(msg) from exception + + return responses + + @component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) + def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str, Any]] = None): + return {"replies": self.invoke(messages=messages, **(generation_kwargs or {}))} + + @classmethod + def get_model_adapter(cls, model: str) -> Optional[Type[BedrockModelChatAdapter]]: + for pattern, adapter in cls.SUPPORTED_MODEL_PATTERNS.items(): + if re.fullmatch(pattern, model): + return adapter + return None + + @classmethod + def aws_configured(cls, **kwargs) -> bool: + """ + Checks whether AWS configuration is provided. + :param kwargs: The kwargs passed down to the generator. + :return: True if AWS configuration is provided, False otherwise. + """ + aws_config_provided = any(key in kwargs for key in AWS_CONFIGURATION_KEYS) + return aws_config_provided + + @classmethod + def get_aws_session( + cls, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + aws_session_token: Optional[str] = None, + aws_region_name: Optional[str] = None, + aws_profile_name: Optional[str] = None, + **kwargs, + ): + """ + Creates an AWS Session with the given parameters. + Checks if the provided AWS credentials are valid and can be used to connect to AWS. + + :param aws_access_key_id: AWS access key ID. + :param aws_secret_access_key: AWS secret access key. + :param aws_session_token: AWS session token. + :param aws_region_name: AWS region name. + :param aws_profile_name: AWS profile name. + :param kwargs: The kwargs passed down to the service client. Supported kwargs depend on the model chosen. + See https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html. + :raises AWSConfigurationError: If the provided AWS credentials are invalid. + :return: The created AWS session. + """ + try: + return boto3.Session( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + region_name=aws_region_name, + profile_name=aws_profile_name, + ) + except BotoCoreError as e: + provided_aws_config = {k: v for k, v in kwargs.items() if k in AWS_CONFIGURATION_KEYS} + msg = f"Failed to initialize the session with provided AWS credentials {provided_aws_config}" + raise AWSConfigurationError(msg) from e + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize this component to a dictionary. + :return: The serialized component as a dictionary. + """ + return default_to_dict( + self, + model=self.model, + stop_words=self.stop_words, + generation_kwargs=self.model_adapter.generation_kwargs, + streaming_callback=self.streaming_callback, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockChatGenerator": + """ + Deserialize this component from a dictionary. + :param data: The dictionary representation of this component. + :return: The deserialized component instance. + """ + init_params = data.get("init_parameters", {}) + serialized_callback_handler = init_params.get("streaming_callback") + if serialized_callback_handler: + data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler) + return default_from_dict(cls, data) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py index 4c43c9a09..8e89dab59 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py @@ -112,7 +112,7 @@ def __init__( # It is hard to determine which tokenizer to use for the SageMaker model # so we use GPT2 tokenizer which will likely provide good token count approximation self.prompt_handler = DefaultPromptHandler( - model="gpt2", + tokenizer="gpt2", model_max_length=model_max_length, max_length=self.max_length or 100, ) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py index 56dcb24d3..b7b555ec0 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from typing import Dict, Union -from transformers import AutoTokenizer +from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast class DefaultPromptHandler: @@ -10,8 +10,20 @@ class DefaultPromptHandler: are within the model_max_length. """ - def __init__(self, model: str, model_max_length: int, max_length: int = 100): - self.tokenizer = AutoTokenizer.from_pretrained(model) + def __init__(self, tokenizer: Union[str, PreTrainedTokenizerBase], model_max_length: int, max_length: int = 100): + """ + :param tokenizer: The tokenizer to be used to tokenize the prompt. + :param model_max_length: The maximum length of the prompt and answer tokens combined. + :param max_length: The maximum length of the answer tokens. + """ + if isinstance(tokenizer, str): + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer) + elif isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)): + self.tokenizer = tokenizer + else: + msg = "model must be a string or a PreTrainedTokenizer instance" + raise ValueError(msg) + self.tokenizer.model_max_length = model_max_length self.model_max_length = model_max_length self.max_length = max_length diff --git a/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py b/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py new file mode 100644 index 000000000..9592b5b39 --- /dev/null +++ b/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py @@ -0,0 +1,250 @@ +from typing import Optional, Type +from unittest.mock import MagicMock, patch + +import pytest +from haystack.components.generators.utils import print_streaming_chunk +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator +from haystack_integrations.components.generators.amazon_bedrock.chat.adapters import ( + AnthropicClaudeChatAdapter, + BedrockModelChatAdapter, + MetaLlama2ChatAdapter, +) + +clazz = "haystack_integrations.components.generators.amazon_bedrock.chat.chat_generator.AmazonBedrockChatGenerator" + + +@pytest.fixture +def mock_auto_tokenizer(): + with patch("transformers.AutoTokenizer.from_pretrained", autospec=True) as mock_from_pretrained: + mock_tokenizer = MagicMock() + mock_from_pretrained.return_value = mock_tokenizer + yield mock_tokenizer + + +# create a fixture with mocked boto3 client and session +@pytest.fixture +def mock_boto3_session(): + with patch("boto3.Session") as mock_client: + yield mock_client + + +@pytest.fixture +def mock_prompt_handler(): + with patch( + "haystack_integrations.components.generators.amazon_bedrock.handlers.DefaultPromptHandler" + ) as mock_prompt_handler: + yield mock_prompt_handler + + +def test_to_dict(mock_auto_tokenizer, mock_boto3_session): + """ + Test that the to_dict method returns the correct dictionary without aws credentials + """ + generator = AmazonBedrockChatGenerator( + model="anthropic.claude-v2", + aws_access_key_id="some_fake_id", + aws_secret_access_key="some_fake_key", + aws_session_token="some_fake_token", + aws_profile_name="some_fake_profile", + aws_region_name="fake_region", + generation_kwargs={"temperature": 0.7}, + streaming_callback=print_streaming_chunk, + ) + expected_dict = { + "type": clazz, + "init_parameters": { + "model": "anthropic.claude-v2", + "generation_kwargs": {"temperature": 0.7}, + "stop_words": [], + "streaming_callback": print_streaming_chunk, + }, + } + + assert generator.to_dict() == expected_dict + + +def test_from_dict(mock_auto_tokenizer, mock_boto3_session): + """ + Test that the from_dict method returns the correct object + """ + generator = AmazonBedrockChatGenerator.from_dict( + { + "type": clazz, + "init_parameters": { + "model": "anthropic.claude-v2", + "generation_kwargs": {"temperature": 0.7}, + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", + }, + } + ) + assert generator.model == "anthropic.claude-v2" + assert generator.model_adapter.generation_kwargs == {"temperature": 0.7} + assert generator.streaming_callback == print_streaming_chunk + + +def test_default_constructor(mock_auto_tokenizer, mock_boto3_session): + """ + Test that the default constructor sets the correct values + """ + + layer = AmazonBedrockChatGenerator( + model="anthropic.claude-v2", + aws_access_key_id="some_fake_id", + aws_secret_access_key="some_fake_key", + aws_session_token="some_fake_token", + aws_profile_name="some_fake_profile", + aws_region_name="fake_region", + ) + + assert layer.model == "anthropic.claude-v2" + + assert layer.model_adapter.prompt_handler is not None + assert layer.model_adapter.prompt_handler.model_max_length == 100000 + + # assert mocked boto3 client called exactly once + mock_boto3_session.assert_called_once() + + # assert mocked boto3 client was called with the correct parameters + mock_boto3_session.assert_called_with( + aws_access_key_id="some_fake_id", + aws_secret_access_key="some_fake_key", + aws_session_token="some_fake_token", + profile_name="some_fake_profile", + region_name="fake_region", + ) + + +def test_constructor_with_generation_kwargs(mock_auto_tokenizer, mock_boto3_session): + """ + Test that model_kwargs are correctly set in the constructor + """ + generation_kwargs = {"temperature": 0.7} + + layer = AmazonBedrockChatGenerator(model="anthropic.claude-v2", generation_kwargs=generation_kwargs) + assert "temperature" in layer.model_adapter.generation_kwargs + assert layer.model_adapter.generation_kwargs["temperature"] == 0.7 + + +def test_constructor_with_empty_model(): + """ + Test that the constructor raises an error when the model is empty + """ + with pytest.raises(ValueError, match="cannot be None or empty string"): + AmazonBedrockChatGenerator(model="") + + +@pytest.mark.unit +def test_invoke_with_no_kwargs(mock_auto_tokenizer, mock_boto3_session): + """ + Test invoke raises an error if no messages are provided + """ + layer = AmazonBedrockChatGenerator(model="anthropic.claude-v2") + with pytest.raises(ValueError, match="The model anthropic.claude-v2 requires"): + layer.invoke() + + +@pytest.mark.unit +@pytest.mark.parametrize( + "model, expected_model_adapter", + [ + ("anthropic.claude-v1", AnthropicClaudeChatAdapter), + ("anthropic.claude-v2", AnthropicClaudeChatAdapter), + ("anthropic.claude-instant-v1", AnthropicClaudeChatAdapter), + ("anthropic.claude-super-v5", AnthropicClaudeChatAdapter), # artificial + ("meta.llama2-13b-chat-v1", MetaLlama2ChatAdapter), + ("meta.llama2-70b-chat-v1", MetaLlama2ChatAdapter), + ("meta.llama2-130b-v5", MetaLlama2ChatAdapter), # artificial + ("unknown_model", None), + ], +) +def test_get_model_adapter(model: str, expected_model_adapter: Optional[Type[BedrockModelChatAdapter]]): + """ + Test that the correct model adapter is returned for a given model + """ + model_adapter = AmazonBedrockChatGenerator.get_model_adapter(model=model) + assert model_adapter == expected_model_adapter + + +class TestAnthropicClaudeAdapter: + def test_prepare_body_with_default_params(self, mock_auto_tokenizer) -> None: + layer = AnthropicClaudeChatAdapter(generation_kwargs={}) + prompt = "Hello, how are you?" + expected_body = { + "prompt": "\n\nHuman: Hello, how are you?\n\nAssistant: ", + "max_tokens_to_sample": 512, + "stop_sequences": ["\n\nHuman:"], + } + + body = layer.prepare_body([ChatMessage.from_user(prompt)]) + + assert body == expected_body + + def test_prepare_body_with_custom_inference_params(self, mock_auto_tokenizer) -> None: + layer = AnthropicClaudeChatAdapter(generation_kwargs={"temperature": 0.7, "top_p": 0.8, "top_k": 4}) + prompt = "Hello, how are you?" + expected_body = { + "prompt": "\n\nHuman: Hello, how are you?\n\nAssistant: ", + "max_tokens_to_sample": 69, + "stop_sequences": ["\n\nHuman:", "CUSTOM_STOP"], + "temperature": 0.7, + "top_p": 0.8, + "top_k": 5, + } + + body = layer.prepare_body( + [ChatMessage.from_user(prompt)], top_p=0.8, top_k=5, max_tokens_to_sample=69, stop_sequences=["CUSTOM_STOP"] + ) + + assert body == expected_body + + +class TestMetaLlama2ChatAdapter: + @pytest.mark.integration + def test_prepare_body_with_default_params(self) -> None: + # leave this test as integration because we really need only tokenizer from HF + # that way we can ensure prompt chat message formatting + layer = MetaLlama2ChatAdapter(generation_kwargs={}) + prompt = "Hello, how are you?" + expected_body = {"prompt": "[INST] Hello, how are you? [/INST]", "max_gen_len": 512} + + body = layer.prepare_body([ChatMessage.from_user(prompt)]) + + assert body == expected_body + + @pytest.mark.integration + def test_prepare_body_with_custom_inference_params(self) -> None: + # leave this test as integration because we really need only tokenizer from HF + # that way we can ensure prompt chat message formatting + layer = MetaLlama2ChatAdapter( + generation_kwargs={"temperature": 0.7, "top_p": 0.8, "top_k": 5, "stop_sequences": ["CUSTOM_STOP"]} + ) + prompt = "Hello, how are you?" + expected_body = { + "prompt": "[INST] Hello, how are you? [/INST]", + "max_gen_len": 69, + "stop_sequences": ["CUSTOM_STOP"], + "temperature": 0.7, + "top_p": 0.8, + "top_k": 5, + } + + body = layer.prepare_body( + [ChatMessage.from_user(prompt)], + temperature=0.7, + top_p=0.8, + top_k=5, + max_gen_len=69, + stop_sequences=["CUSTOM_STOP"], + ) + + assert body == expected_body + + @pytest.mark.integration + def test_get_responses(self) -> None: + adapter = MetaLlama2ChatAdapter(generation_kwargs={}) + response_body = {"generation": "This is a single response."} + expected_response = "This is a single response." + response_message = adapter.get_responses(response_body) + assert response_message == [ChatMessage.from_assistant(expected_response)] From ff122ab3d728377fe8f874b39fe9c74aceb6e9e6 Mon Sep 17 00:00:00 2001 From: Madeesh Kannan Date: Thu, 8 Feb 2024 15:00:08 +0100 Subject: [PATCH 08/12] ci: Generate API docs for Qdrant (#361) --- .github/workflows/qdrant.yml | 42 ++++++++++-------- integrations/qdrant/pydoc/config.yml | 32 ++++++++++++++ integrations/qdrant/pyproject.toml | 66 ++++++++++------------------ 3 files changed, 78 insertions(+), 62 deletions(-) create mode 100644 integrations/qdrant/pydoc/config.yml diff --git a/.github/workflows/qdrant.yml b/.github/workflows/qdrant.yml index 2bbf4f63a..9f031031f 100644 --- a/.github/workflows/qdrant.yml +++ b/.github/workflows/qdrant.yml @@ -7,8 +7,8 @@ on: - cron: "0 0 * * *" pull_request: paths: - - 'integrations/qdrant/**' - - '.github/workflows/qdrant.yml' + - "integrations/qdrant/**" + - ".github/workflows/qdrant.yml" defaults: run: @@ -30,27 +30,31 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ['3.9', '3.10'] + python-version: ["3.9", "3.10"] steps: - - name: Support longpaths - if: matrix.os == 'windows-latest' - working-directory: . - run: git config --system core.longpaths true + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - - name: Install Hatch - run: pip install --upgrade hatch + - name: Install Hatch + run: pip install --upgrade hatch - - name: Lint - if: matrix.python-version == '3.9' && runner.os == 'Linux' - run: hatch run lint:all + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all - - name: Run tests - run: hatch run cov + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + run: hatch run cov diff --git a/integrations/qdrant/pydoc/config.yml b/integrations/qdrant/pydoc/config.yml new file mode 100644 index 000000000..94c1ca50f --- /dev/null +++ b/integrations/qdrant/pydoc/config.yml @@ -0,0 +1,32 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: + [ + "haystack_integrations.components.retrievers.qdrant.retriever", + "haystack_integrations.document_stores.qdrant.document_store", + "haystack_integrations.document_stores.qdrant.converters", + "haystack_integrations.document_stores.qdrant.filters", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Qdrant integration for Haystack + category_slug: haystack-integrations + title: Qdrant + slug: integrations-qdrant + order: 160 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_qdrant.md diff --git a/integrations/qdrant/pyproject.toml b/integrations/qdrant/pyproject.toml index 9c19d144e..58a3534c4 100644 --- a/integrations/qdrant/pyproject.toml +++ b/integrations/qdrant/pyproject.toml @@ -25,10 +25,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = [ - "haystack-ai", - "qdrant-client", -] +dependencies = ["haystack-ai", "qdrant-client"] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations" @@ -47,47 +44,25 @@ root = "../.." git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"' [tool.hatch.envs.default] -dependencies = [ - "coverage[toml]>=6.5", - "pytest", -] +dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" test-cov = "coverage run -m pytest {args:tests}" -cov-report = [ - "- coverage combine", - "coverage report", -] -cov = [ - "test-cov", - "cov-report", -] +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] +docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.7", "3.8", "3.9", "3.10", "3.11"] [tool.hatch.envs.lint] detached = true -dependencies = [ - "black>=23.1.0", - "mypy>=1.0.0", - "ruff>=0.0.243", -] +dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] [tool.hatch.envs.lint.scripts] typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" -style = [ - "ruff {args:.}", - "black --check --diff {args:.}", -] -fmt = [ - "black {args:.}", - "ruff --fix {args:.}", - "style", -] -all = [ - "style", - "typing", -] +style = ["ruff {args:.}", "black --check --diff {args:.}"] +fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] +all = ["style", "typing"] [tool.black] target-version = ["py37"] @@ -130,9 +105,15 @@ ignore = [ # Allow boolean positional values in function calls, like `dict.get(... True)` "FBT003", # Ignore checks for possible passwords - "S105", "S106", "S107", + "S105", + "S106", + "S107", # Ignore complexity - "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", ] unfixable = [ # Don't touch unused imports @@ -153,15 +134,14 @@ parallel = true [tool.coverage.paths] -qdrant_haystack = ["src/qdrant_haystack", "*/qdrant-haystack/src/qdrant_haystack"] +qdrant_haystack = [ + "src/qdrant_haystack", + "*/qdrant-haystack/src/qdrant_haystack", +] tests = ["tests", "*/qdrant-haystack/tests"] [tool.coverage.report] -exclude_lines = [ - "no cov", - "if __name__ == .__main__.:", - "if TYPE_CHECKING:", -] +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [[tool.mypy.overrides]] module = [ @@ -170,6 +150,6 @@ module = [ "pytest.*", "qdrant_client.*", "numpy", - "grpc" + "grpc", ] ignore_missing_imports = true From 52508a69469cbfcf3aef8e170b76303adf3d9013 Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:07:01 +0100 Subject: [PATCH 09/12] feat: Generate API docs (#380) * Adds docs generation in instructor_embedders * Trigger CI * Update config.yml * Remove CI trigger --- .github/workflows/instructor_embedders.yml | 32 ++++++++------ .../instructor_embedders/pydoc/config.yml | 30 +++++++++++++ .../instructor_embedders/pyproject.toml | 44 ++++++++----------- 3 files changed, 66 insertions(+), 40 deletions(-) create mode 100644 integrations/instructor_embedders/pydoc/config.yml diff --git a/.github/workflows/instructor_embedders.yml b/.github/workflows/instructor_embedders.yml index 4145408e2..09d04e9d3 100644 --- a/.github/workflows/instructor_embedders.yml +++ b/.github/workflows/instructor_embedders.yml @@ -5,8 +5,8 @@ on: - cron: "0 0 * * *" pull_request: paths: - - 'integrations/instructor_embedders/**' - - '.github/workflows/instructor_embedders.yml' + - "integrations/instructor_embedders/**" + - ".github/workflows/instructor_embedders.yml" defaults: run: @@ -16,19 +16,23 @@ jobs: test: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" - - name: Install Hatch - run: pip install --upgrade hatch + - name: Install Hatch + run: pip install --upgrade hatch - - name: Lint - run: hatch run lint:all + - name: Lint + run: hatch run lint:all - - name: Run tests - run: hatch run cov + - name: Generate docs + if: runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + run: hatch run cov diff --git a/integrations/instructor_embedders/pydoc/config.yml b/integrations/instructor_embedders/pydoc/config.yml new file mode 100644 index 000000000..cc16a72f7 --- /dev/null +++ b/integrations/instructor_embedders/pydoc/config.yml @@ -0,0 +1,30 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: + [ + "haystack_integrations.components.embedders.instructor_embedders", + "haystack_integrations.components.embedders.instructor_embedders.embedding_backend", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Embedders integration for Haystack + category_slug: haystack-integrations + title: Embedders + slug: integrations-instructor-embedders + order: 90 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_instructor_embedders.md diff --git a/integrations/instructor_embedders/pyproject.toml b/integrations/instructor_embedders/pyproject.toml index c8a591b69..24e69ce59 100644 --- a/integrations/instructor_embedders/pyproject.toml +++ b/integrations/instructor_embedders/pyproject.toml @@ -10,9 +10,7 @@ readme = "README.md" requires-python = ">=3.7" license = "Apache-2.0" keywords = [] -authors = [ - { name = "deepset GmbH", email = "info@deepset.ai" }, -] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", @@ -43,7 +41,7 @@ dependencies = [ "tqdm", "rich", - "InstructorEmbedding" + "InstructorEmbedding", ] [project.optional-dependencies] @@ -66,38 +64,26 @@ root = "../.." git_describe_command = 'git describe --tags --match="integrations/instructor_embedders-v[0-9]*"' [tool.hatch.envs.default] -dependencies = ["pytest", "pytest-cov"] +dependencies = ["pytest", "pytest-cov", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=instructor-embedders --cov=tests" no-cov = "cov --no-cov" test = "pytest {args:tests}" +docs = "pydoc-markdown pydoc/config.yml" [[tool.hatch.envs.test.matrix]] python = ["38", "39", "310", "311"] [tool.hatch.envs.lint] detached = true -dependencies = [ - "black>=23.1.0", - "mypy>=1.0.0", - "ruff>=0.0.243", -] +dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] + [tool.hatch.envs.lint.scripts] typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" -style = [ - "ruff {args:.}", - "black --check --diff {args:.}", -] -fmt = [ - "black {args:.}", - "ruff --fix {args:.}", - "style", -] -all = [ - "style", - "typing", -] +style = ["ruff {args:.}", "black --check --diff {args:.}"] +fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] +all = ["style", "typing"] [tool.coverage.run] branch = true @@ -141,9 +127,15 @@ ignore = [ # Allow boolean positional values in function calls, like `dict.get(... True)` "FBT003", # Ignore checks for possible passwords - "S105", "S106", "S107", + "S105", + "S106", + "S107", # Ignore complexity - "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", ] unfixable = [ # Don't touch unused imports @@ -178,4 +170,4 @@ module = [ "pytest.*", "numpy.*", ] -ignore_missing_imports = true \ No newline at end of file +ignore_missing_imports = true From 77d49363917390be13cbcd11d62c8cd8c8446f6e Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Thu, 8 Feb 2024 18:19:19 +0100 Subject: [PATCH 10/12] feat: Generate API docs for Jina (#381) * Adds docs generation in jina * Add missing dependency * Fix docs command --- .github/workflows/jina.yml | 42 ++++++++++---------- integrations/jina/pydoc/config.yml | 26 +++++++++++++ integrations/jina/pyproject.toml | 61 ++++++++++-------------------- 3 files changed, 68 insertions(+), 61 deletions(-) create mode 100644 integrations/jina/pydoc/config.yml diff --git a/.github/workflows/jina.yml b/.github/workflows/jina.yml index 894456877..1f8e83a7d 100644 --- a/.github/workflows/jina.yml +++ b/.github/workflows/jina.yml @@ -7,8 +7,8 @@ on: - cron: "0 0 * * *" pull_request: paths: - - 'integrations/jina/**' - - '.github/workflows/jina.yml' + - "integrations/jina/**" + - ".github/workflows/jina.yml" defaults: run: @@ -30,27 +30,31 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ['3.9', '3.10'] + python-version: ["3.9", "3.10"] steps: - - name: Support longpaths - if: matrix.os == 'windows-latest' - working-directory: . - run: git config --system core.longpaths true + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - - name: Install Hatch - run: pip install --upgrade hatch + - name: Install Hatch + run: pip install --upgrade hatch - - name: Lint - if: matrix.python-version == '3.9' && runner.os == 'Linux' - run: hatch run lint:all + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all - - name: Run tests - run: hatch run cov \ No newline at end of file + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + run: hatch run cov diff --git a/integrations/jina/pydoc/config.yml b/integrations/jina/pydoc/config.yml new file mode 100644 index 000000000..8d4943d5f --- /dev/null +++ b/integrations/jina/pydoc/config.yml @@ -0,0 +1,26 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: ["haystack_integrations.components.embedders.jina"] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Jina integration for Haystack + category_slug: haystack-integrations + title: Jina + slug: integrations-jina + order: 1 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_jina.md diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml index 1136db797..565def7a5 100644 --- a/integrations/jina/pyproject.toml +++ b/integrations/jina/pyproject.toml @@ -43,47 +43,26 @@ root = "../.." git_describe_command = 'git describe --tags --match="integrations/jina-v[0-9]*"' [tool.hatch.envs.default] -dependencies = [ - "coverage[toml]>=6.5", - "pytest", -] +dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" test-cov = "coverage run -m pytest {args:tests}" -cov-report = [ - "- coverage combine", - "coverage report", -] -cov = [ - "test-cov", - "cov-report", -] +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] +docs = ["pydoc-markdown pydoc/config.yml"] + [[tool.hatch.envs.all.matrix]] python = ["3.7", "3.8", "3.9", "3.10", "3.11"] [tool.hatch.envs.lint] detached = true -dependencies = [ - "black>=23.1.0", - "mypy>=1.0.0", - "ruff>=0.0.243", -] +dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] [tool.hatch.envs.lint.scripts] typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" -style = [ - "ruff {args:.}", - "black --check --diff {args:.}", -] -fmt = [ - "black {args:.}", - "ruff --fix {args:.}", - "style", -] -all = [ - "style", - "typing", -] +style = ["ruff {args:.}", "black --check --diff {args:.}"] +fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] +all = ["style", "typing"] [tool.black] target-version = ["py37"] @@ -123,9 +102,15 @@ ignore = [ # Allow non-abstract empty methods in abstract base classes "B027", # Ignore checks for possible passwords - "S105", "S106", "S107", + "S105", + "S106", + "S107", # Ignore complexity - "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", ] unfixable = [ # Don't touch unused imports @@ -152,16 +137,8 @@ jina_haystack = ["src"] tests = ["tests", "*/jina-haystack/tests"] [tool.coverage.report] -exclude_lines = [ - "no cov", - "if __name__ == .__main__.:", - "if TYPE_CHECKING:", -] +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [[tool.mypy.overrides]] -module = [ - "haystack.*", - "haystack_integrations.*", - "pytest.*" -] +module = ["haystack.*", "haystack_integrations.*", "pytest.*"] ignore_missing_imports = true From 37723696e892b9840b36ce2820ca7ca566defa98 Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Thu, 8 Feb 2024 19:11:47 +0100 Subject: [PATCH 11/12] gradient: generate api docs (#352) --- .github/workflows/gradient.yml | 4 ++++ integrations/gradient/pydoc/config.yml | 30 ++++++++++++++++++++++++++ integrations/gradient/pyproject.toml | 5 ++++- 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 integrations/gradient/pydoc/config.yml diff --git a/.github/workflows/gradient.yml b/.github/workflows/gradient.yml index 8bab11d39..61a04be7b 100644 --- a/.github/workflows/gradient.yml +++ b/.github/workflows/gradient.yml @@ -52,5 +52,9 @@ jobs: if: matrix.python-version == '3.9' && runner.os == 'Linux' run: hatch run lint:all + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests run: hatch run cov \ No newline at end of file diff --git a/integrations/gradient/pydoc/config.yml b/integrations/gradient/pydoc/config.yml new file mode 100644 index 000000000..6ffae5f28 --- /dev/null +++ b/integrations/gradient/pydoc/config.yml @@ -0,0 +1,30 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.embedders.gradient.gradient_document_embedder", + "haystack_integrations.components.embedders.gradient.gradient_text_embedder", + "haystack_integrations.components.generators.gradient.base", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Cohere integration for Haystack + category_slug: haystack-integrations + title: Gradient + slug: integrations-gradient + order: 80 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_gradient.md \ No newline at end of file diff --git a/integrations/gradient/pyproject.toml b/integrations/gradient/pyproject.toml index 22140bba5..51919f149 100644 --- a/integrations/gradient/pyproject.toml +++ b/integrations/gradient/pyproject.toml @@ -52,6 +52,7 @@ git_describe_command = 'git describe --tags --match="integrations/gradient-v[0-9 dependencies = [ "coverage[toml]>=6.5", "pytest", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" @@ -64,7 +65,9 @@ cov = [ "test-cov", "cov-report", ] - +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] From e49d52d5550e710cf4db20dbc3626970c4510afd Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Fri, 9 Feb 2024 12:58:55 +0100 Subject: [PATCH 12/12] CI: sync API docs with readme.com (#266) * first draft test commit, to be reverted Revert "test commit, to be reverted" This reverts commit b9997f7269ab79ea73347ccc763e33f28cf0691b. * test commit, to be reverted fine tune action settings try try try try * backup * revert * test commit * install hatch * test commit * uniform workflow name * skip step if nothing changed * sync with readme * revert * final touches * Update .github/workflows/CI_readme_sync.yml Co-authored-by: Madeesh Kannan * test workflow, revert before merge * revert script optimization * Revert "test workflow, revert before merge" This reverts commit 3538b01b14ea3354fba4bd49ef1ed57f63f884a5. * test * reintroduce optimization * to be reverted * revert * try * test * test * Update CI_readme_sync.yml --------- Co-authored-by: Madeesh Kannan --- .github/workflows/CI_readme_sync.yml | 64 ++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .github/workflows/CI_readme_sync.yml diff --git a/.github/workflows/CI_readme_sync.yml b/.github/workflows/CI_readme_sync.yml new file mode 100644 index 000000000..18cb6378f --- /dev/null +++ b/.github/workflows/CI_readme_sync.yml @@ -0,0 +1,64 @@ +name: Core / Sync docs with Readme + +on: + push: + branches: + - main + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - name: Checkout this repo + uses: actions/checkout@v4 + with: + # This will tell tj-actions/changed-files to compare the current pushed commit with the latest in main + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -U haystack-pydoc-tools hatch + + # We look into the changeset in order to understand + # which integrations were modified by the last commit. + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v42 + with: + files: integrations/** + # Output unique changed directories instead of filenames + dir_names: true + # We only care about the name of the integration, i.e. integrations/FOO + dir_names_max_depth: 2 + + - name: Generate docs + if: steps.changed-files.outputs.all_changed_files != '' + env: + # This is necessary to fetch the documentation categories + # from Readme.io as we need them to associate the slug + # in config files with their id. + README_API_KEY: ${{ secrets.README_API_KEY }} + ALL_CHANGED_DIRS: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + for d in $ALL_CHANGED_DIRS; do + cd $d + hatch run docs + cd - + done + mkdir tmp + find . -name "_readme_*.md" -exec cp "{}" tmp \; + ls tmp + + - name: Sync preview docs with 2.0 + if: steps.changed-files.outputs.all_changed_files != '' + uses: readmeio/rdme@8.3.1 + env: + README_API_KEY: ${{ secrets.README_API_KEY }} + with: + rdme: docs ./tmp --key="$README_API_KEY" --version=2.0