Skip to content

Commit

Permalink
Merge branch 'main' into feature/chroma-query-text-retriever-filters
Browse files Browse the repository at this point in the history
  • Loading branch information
masci authored May 10, 2024
2 parents 7b2ea59 + c29db9c commit 16ac556
Show file tree
Hide file tree
Showing 76 changed files with 1,005 additions and 327 deletions.
32 changes: 29 additions & 3 deletions .github/workflows/CI_readme_sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
push:
tags:
- "**-v[0-9].[0-9]+.[0-9]+"

workflow_dispatch: # Activate this workflow manually
inputs:
tag:
Expand All @@ -16,8 +17,30 @@ env:
TAG: ${{ inputs.tag || github.ref_name }}

jobs:
get-versions:
runs-on: ubuntu-latest
outputs:
versions: ${{ steps.version_finder.outputs.versions }}
steps:
- name: Get Haystack Docs versions
id: version_finder
run: |
curl -s https://dash.readme.com/api/v1/version --header 'authorization: Basic ${{ secrets.README_API_KEY }}' > out
VERSIONS=$(jq '[ .[] | select(.version | startswith("2."))| .version ]' out)
{
echo 'versions<<EOF'
echo $VERSIONS
echo EOF
} >> "$GITHUB_OUTPUT"
sync:
runs-on: ubuntu-latest
needs: get-versions
strategy:
fail-fast: false
max-parallel: 1
matrix:
hs-docs-version: ${{ fromJSON(needs.get-versions.outputs.versions) }}
steps:
- name: Checkout this repo
uses: actions/checkout@v4
Expand All @@ -39,7 +62,7 @@ jobs:
import os
project_path = os.environ["TAG"].rsplit("-", maxsplit=1)[0]
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
print(f'project_path={project_path}', file=f)
print(f'project_path={project_path}', file=f)
- name: Generate docs
working-directory: ${{ steps.pathfinder.outputs.project_path }}
Expand All @@ -48,13 +71,16 @@ jobs:
# from Readme.io as we need them to associate the slug
# in config files with their id.
README_API_KEY: ${{ secrets.README_API_KEY }}
# The same category has a different id on different readme docs versions.
# This is the docs version on readme that we'll use to get the category id.
PYDOC_TOOLS_HAYSTACK_DOC_VERSION: ${{ matrix.hs-docs-version }}
run: |
hatch run docs
mkdir tmp
find . -name "_readme_*.md" -exec cp "{}" tmp \;
ls tmp
- name: Sync API docs
- name: Sync API docs with Haystack docs version ${{ matrix.hs-docs-version }}
uses: readmeio/rdme@v8
with:
rdme: docs ${{ steps.pathfinder.outputs.project_path }}/tmp --key=${{ secrets.README_API_KEY }} --version=2.0
rdme: docs ${{ steps.pathfinder.outputs.project_path }}/tmp --key=${{ secrets.README_API_KEY }} --version=${{ matrix.hs-docs-version }}
3 changes: 2 additions & 1 deletion .github/workflows/nvidia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
NVIDIA_CATALOG_API_KEY: ${{ secrets.NVIDIA_CATALOG_API_KEY }}

jobs:
run:
Expand Down Expand Up @@ -73,7 +74,7 @@ jobs:
uses: ./.github/actions/send_failure
with:
title: |
core-integrations failure:
core-integrations failure:
${{ (steps.tests.conclusion == 'nightly-haystack-main') && 'nightly-haystack-main' || 'tests' }}
- ${{ github.workflow }}
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,5 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
| [qdrant-haystack](integrations/qdrant/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/qdrant-haystack.svg?color=orange)](https://pypi.org/project/qdrant-haystack) | [![Test / qdrant](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml) |
| [ragas-haystack](integrations/ragas/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack) | [![Test / ragas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml) |
| [unstructured-fileconverter-haystack](integrations/unstructured/) | File converter | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml) |
| [uptrain-haystack](https://github.com/deepset-ai/haystack-core-integrations/tree/staging/integrations/uptrain) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/uptrain-haystack.svg)](https://pypi.org/project/uptrain-haystack) | Staged |
| [uptrain-haystack](https://github.com/deepset-ai/haystack-core-integrations/tree/staging/integrations/uptrain) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/uptrain-haystack.svg)](https://pypi.org/project/uptrain-haystack) | [Staged](https://docs.haystack.deepset.ai/docs/breaking-change-policy#discontinuing-an-integration) |
| [weaviate-haystack](integrations/weaviate/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack) | [![Test / weaviate](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml) |
2 changes: 1 addition & 1 deletion integrations/amazon_bedrock/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ processors:
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Amazon Bedrock integration for Haystack
category_slug: integrations-api
title: Amazon Bedrock
Expand Down
2 changes: 1 addition & 1 deletion integrations/amazon_sagemaker/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ processors:
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Amazon Sagemaker integration for Haystack
category_slug: integrations-api
title: Amazon Sagemaker
Expand Down
4 changes: 2 additions & 2 deletions integrations/anthropic/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ processors:
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Anthropic integration for Haystack
category_slug: integrations-api
title: Anthropic
slug: integrations-anthropic
order: 22
order: 23
markdown:
descriptive_class_title: false
descriptive_module_title: true
Expand Down
2 changes: 1 addition & 1 deletion integrations/astra/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ processors:
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Astra integration for Haystack
category_slug: integrations-api
title: Astra
Expand Down
18 changes: 9 additions & 9 deletions integrations/astra/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ dependencies = [
[tool.hatch.envs.lint.scripts]
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
style = [
"ruff {args:.}",
"ruff check {args:.}",
"black --check --diff {args:.}",
]
fmt = [
"black {args:.}",
"ruff --fix {args:.}",
"ruff check --fix {args:.}",
"style",
]
all = [
Expand All @@ -104,7 +104,7 @@ skip-string-normalization = true
[tool.ruff]
target-version = "py38"
line-length = 120
select = [
lint.select = [
"A",
"ARG",
"B",
Expand All @@ -131,7 +131,7 @@ select = [
"W",
"YTT",
]
ignore = [
lint.ignore = [
# Allow non-abstract empty methods in abstract base classes
"B027",
# Allow boolean positional values in function calls, like `dict.get(... True)`
Expand All @@ -141,19 +141,19 @@ ignore = [
# Ignore complexity
"C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
]
unfixable = [
lint.unfixable = [
# Don't touch unused imports
"F401",
]
exclude = ["example"]
lint.exclude = ["example"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
known-first-party = ["haystack_integrations"]

[tool.ruff.flake8-tidy-imports]
[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "parents"

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252"]

Expand Down
10 changes: 8 additions & 2 deletions integrations/astra/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from haystack_integrations.document_stores.astra import AstraDocumentStore


def test_namespace_init():
@pytest.fixture
def mock_auth(monkeypatch):
monkeypatch.setenv("ASTRA_DB_API_ENDPOINT", "http://example.com")
monkeypatch.setenv("ASTRA_DB_APPLICATION_TOKEN", "test_token")


def test_namespace_init(mock_auth): # noqa
with mock.patch("haystack_integrations.document_stores.astra.astra_client.AstraDB") as client:
AstraDocumentStore()
assert "namespace" in client.call_args.kwargs
Expand All @@ -25,7 +31,7 @@ def test_namespace_init():
assert client.call_args.kwargs["namespace"] == "foo"


def test_to_dict():
def test_to_dict(mock_auth): # noqa
with mock.patch("haystack_integrations.document_stores.astra.astra_client.AstraDB"):
ds = AstraDocumentStore()
result = ds.to_dict()
Expand Down
2 changes: 1 addition & 1 deletion integrations/chroma/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ processors:
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Chroma integration for Haystack
category_slug: integrations-api
title: Chroma
Expand Down
2 changes: 1 addition & 1 deletion integrations/cohere/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ processors:
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
excerpt: Cohere integration for Haystack
category_slug: integrations-api
title: Cohere
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def __init__(
api_base_url: str = "https://api.cohere.com",
truncate: str = "END",
use_async_client: bool = False,
max_retries: int = 3,
timeout: int = 120,
batch_size: int = 32,
progress_bar: bool = True,
Expand All @@ -67,7 +66,6 @@ def __init__(
If "NONE" is selected, when the input exceeds the maximum input token length an error will be returned.
:param use_async_client: flag to select the AsyncClient. It is recommended to use
AsyncClient for applications with many concurrent calls.
:param max_retries: maximal number of retries for requests.
:param timeout: request timeout in seconds.
:param batch_size: number of Documents to encode at once.
:param progress_bar: whether to show a progress bar or not. Can be helpful to disable in production deployments
Expand All @@ -82,7 +80,6 @@ def __init__(
self.api_base_url = api_base_url
self.truncate = truncate
self.use_async_client = use_async_client
self.max_retries = max_retries
self.timeout = timeout
self.batch_size = batch_size
self.progress_bar = progress_bar
Expand All @@ -104,7 +101,6 @@ def to_dict(self) -> Dict[str, Any]:
api_base_url=self.api_base_url,
truncate=self.truncate,
use_async_client=self.use_async_client,
max_retries=self.max_retries,
timeout=self.timeout,
batch_size=self.batch_size,
progress_bar=self.progress_bar,
Expand Down Expand Up @@ -170,7 +166,6 @@ def run(self, documents: List[Document]):
cohere_client = AsyncClient(
api_key,
base_url=self.api_base_url,
max_retries=self.max_retries,
timeout=self.timeout,
client_name="haystack",
)
Expand All @@ -181,7 +176,6 @@ def run(self, documents: List[Document]):
cohere_client = Client(
api_key,
base_url=self.api_base_url,
max_retries=self.max_retries,
timeout=self.timeout,
client_name="haystack",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __init__(
api_base_url: str = "https://api.cohere.com",
truncate: str = "END",
use_async_client: bool = False,
max_retries: int = 3,
timeout: int = 120,
):
"""
Expand All @@ -60,7 +59,6 @@ def __init__(
If "NONE" is selected, when the input exceeds the maximum input token length an error will be returned.
:param use_async_client: flag to select the AsyncClient. It is recommended to use
AsyncClient for applications with many concurrent calls.
:param max_retries: maximum number of retries for requests.
:param timeout: request timeout in seconds.
"""

Expand All @@ -70,7 +68,6 @@ def __init__(
self.api_base_url = api_base_url
self.truncate = truncate
self.use_async_client = use_async_client
self.max_retries = max_retries
self.timeout = timeout

def to_dict(self) -> Dict[str, Any]:
Expand All @@ -88,7 +85,6 @@ def to_dict(self) -> Dict[str, Any]:
api_base_url=self.api_base_url,
truncate=self.truncate,
use_async_client=self.use_async_client,
max_retries=self.max_retries,
timeout=self.timeout,
)

Expand Down Expand Up @@ -132,7 +128,6 @@ def run(self, text: str):
cohere_client = AsyncClient(
api_key,
base_url=self.api_base_url,
max_retries=self.max_retries,
timeout=self.timeout,
client_name="haystack",
)
Expand All @@ -143,7 +138,6 @@ def run(self, text: str):
cohere_client = Client(
api_key,
base_url=self.api_base_url,
max_retries=self.max_retries,
timeout=self.timeout,
client_name="haystack",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_response(
desc="Calculating embeddings",
):
batch = texts[i : i + batch_size]
response = cohere_client.embed(batch, model=model_name, input_type=input_type, truncate=truncate)
response = cohere_client.embed(texts=batch, model=model_name, input_type=input_type, truncate=truncate)
for emb in response.embeddings:
all_embeddings.append(emb)
if response.meta is not None:
Expand Down
Loading

0 comments on commit 16ac556

Please sign in to comment.