Skip to content

Commit

Permalink
Add Ci + Add PR fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mounaTay committed Jan 11, 2024
1 parent e718180 commit 9cb8eac
Show file tree
Hide file tree
Showing 20 changed files with 265 additions and 127 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/astra.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / astra

on:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- 'integrations/astra/**'
- '.github/workflows/astra.yml'

defaults:
run:
working-directory: integrations/astra

concurrency:
group: astra-${{ github.head_ref }}
cancel-in-progress: true

env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"

jobs:
run:
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
max-parallel: 1
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.9', '3.10']

steps:
- name: Support longpaths
if: matrix.os == 'windows-latest'
working-directory: .
run: git config --system core.longpaths true

- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install Hatch
run: pip install --upgrade hatch

- name: Lint
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run lint:all

- name: Run tests
run: hatch run cov
Empty file added integrations/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions integrations/astra/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ Similarly, to run the linters:
hatch run lint:all
```

## Usage

This package includes Astra Document Store and Astra Retriever classes that integrate with Haystack 2.0, allowing you to easily perform document retrieval or RAG with Astra, and include those functions in Haystack pipelines.

### In order to use the Document Store directly:

Import the Document Store:
```
from astra_haystack.document_store import AstraDocumentStore
from haystack.document_stores import DuplicatePolicy
```

### Create a new integration

> Core integrations follow the naming convention `PREFIX-haystack`, where `PREFIX` can be the name of the technology
Expand Down
Empty file added integrations/astra/__init__.py
Empty file.
31 changes: 10 additions & 21 deletions integrations/astra/examples/example.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,19 @@
import os

# from pathlib import Path

# from haystack import Pipeline
# from haystack.components.converters import TextFileToDocument
# from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.document_stores import DuplicatePolicy

# from preprocessor import PreProcessor

from astra_store.document_store import AstraDocumentStore
from astra_store.retriever import AstraRetriever

from pathlib import Path

from haystack import Pipeline
from haystack import Document
from haystack.components.converters import TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter, DocumentJoiner
from haystack.components.routers import FileTypeRouter
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores import DuplicatePolicy

from astra_haystack.document_store import AstraDocumentStore
from astra_haystack.retriever import AstraRetriever

HERE = Path(__file__).resolve().parent
file_paths = [HERE / "data" / Path(name) for name in os.listdir("examples/data")]
file_paths = [HERE / "data" / Path(name) for name in os.listdir("integrations/astra/examples/data")]
print(file_paths)

astra_id = os.getenv("ASTRA_DB_ID", "")
Expand Down Expand Up @@ -99,18 +88,18 @@
)

print("get_document_by_id")
print(document_store.get_document_by_id("afce9044d7f610aa28b335c4694da52248460a6a19a57f8522a7665142aa2aa7"))
print(document_store.get_document_by_id("92ef055fbae55b2b0fc79d34cbf8a80b0ad7700ca526053223b0cc6d1351df10"))
print("get_documents_by_ids")
print(
document_store.get_documents_by_id(
[
"afce9044d7f610aa28b335c4694da52248460a6a19a57f8522a7665142aa2aa7",
"92ef055fbae55b2b0fc79d34cbf8a80b0ad7700ca526053223b0cc6d1351df10",
"6f2450a51eaa3eeb9239d875402bcfe24b2d3534ff27f26c1f3fc8133b04e756",
]
)
)

document_store.delete_documents(["afce9044d7f610aa28b335c4694da52248460a6a19a57f8522a7665142aa2aa7"])
document_store.delete_documents(["92ef055fbae55b2b0fc79d34cbf8a80b0ad7700ca526053223b0cc6d1351df10"])

print("count:")
print(document_store.count_documents())
Expand Down
14 changes: 7 additions & 7 deletions integrations/astra/examples/pipeline_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
from haystack import Document, Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import GPTGenerator
from haystack.document_stores import DuplicatePolicy
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.generators import OpenAIGenerator
from haystack.components.writers import DocumentWriter
from haystack.document_stores import DuplicatePolicy

from astra_store.document_store import AstraDocumentStore
from astra_store.retriever import AstraRetriever
from astra_haystack.document_store import AstraDocumentStore
from astra_haystack.retriever import AstraRetriever

# Create a RAG query pipeline
prompt_template = """
Given these documents, answer the question.
Documents:
{% for doc in documents[0] %}
{% for doc in documents %}
{{ doc.content }}
{% endfor %}
Expand Down Expand Up @@ -73,13 +73,13 @@
)
rag_pipeline.add_component(instance=AstraRetriever(document_store=document_store), name="retriever")
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
rag_pipeline.add_component(instance=GPTGenerator(api_key=os.environ.get("OPENAI_API_KEY")), name="llm")
rag_pipeline.add_component(instance=OpenAIGenerator(api_key=os.environ.get("OPENAI_API_KEY")), name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
rag_pipeline.connect("embedder", "retriever")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")
rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
rag_pipeline.connect("llm.meta", "answer_builder.meta")
rag_pipeline.connect("retriever", "answer_builder.documents")


Expand Down
3 changes: 3 additions & 0 deletions integrations/astra/examples/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
haystack-ai==2.0.0b4
sentence_transformers==2.2.2
openai==1.6.1
31 changes: 16 additions & 15 deletions integrations/astra/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
[build-system]
requires = ["hatchling"]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "astra-store"
name = "astra-haystack"
dynamic = ["version"]
description = ''
readme = "README.md"
requires-python = ">=3.7"
license = "Apache-2.0"
keywords = []
authors = [
{ name = "John Doe", email = "[email protected]" },
{ name = "Anant Corporation", email = "[email protected]" },
]
classifiers = [
"Development Status :: 4 - Beta",
Expand All @@ -25,18 +25,22 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
# we distribute the preview version of Haystack 2.0 under the package "haystack-ai"
"haystack-ai",
"typing_extensions",
]

[project.urls]
Documentation = "https://github.com/Anant/astra-haystack/tree/main#readme"
Issues = "https://github.com/Anant/astra-haystack/issues"
Source = "https://github.com/Anant/astra-haystack"
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/astra#readme"
Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/astra"

[tool.hatch.version]
path = "src/astra_store/__about__.py"
source = "vcs"
tag-pattern = 'integrations\/astra-v(?P<version>.*)'

[tool.hatch.version.raw-options]
root = "../.."
git_describe_command = 'git describe --tags --match="integrations/astra-v[0-9]*"'

[tool.hatch.envs.default]
dependencies = [
Expand Down Expand Up @@ -66,7 +70,7 @@ dependencies = [
"ruff>=0.0.243",
]
[tool.hatch.envs.lint.scripts]
typing = "mypy --install-types --non-interactive {args:src/astra_store tests}"
typing = "mypy --install-types --non-interactive {args:src/astra_haystack tests}"
style = [
"ruff {args:.}",
"black --check --diff {args:.}",
Expand Down Expand Up @@ -140,7 +144,7 @@ unfixable = [
]

[tool.ruff.isort]
known-first-party = ["astra_store"]
known-first-party = ["astra_haystack"]

[tool.ruff.flake8-tidy-imports]
ban-relative-imports = "all"
Expand All @@ -150,15 +154,12 @@ ban-relative-imports = "all"
"tests/**/*" = ["PLR2004", "S101", "TID252"]

[tool.coverage.run]
source_pkgs = ["astra_store", "tests"]
source_pkgs = ["astra_haystack", "tests"]
branch = true
parallel = true
omit = [
"src/astra_store/__about__.py",
]

[tool.coverage.paths]
astra_store = ["src/astra_store", "*/astra-store/src/astra_store"]
astra_haystack = ["src/astra_haystack", "*/astra-store/src/astra_haystack"]
tests = ["tests", "*/astra-store/tests"]

[tool.coverage.report]
Expand Down
6 changes: 6 additions & 0 deletions integrations/astra/src/astra_haystack/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# SPDX-FileCopyrightText: 2023-present Anant Corporation <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
from astra_haystack.document_store import AstraDocumentStore

__all__ = ["AstraDocumentStore"]
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pydantic.dataclasses import dataclass

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


@dataclass
Expand Down Expand Up @@ -70,7 +69,7 @@ def find_index(self):

if "status" in response_dict:
collection_name_matches = list(
filter(lambda d: d['name'] == self.collection_name, response_dict["status"]["collections"])
filter(lambda d: d["name"] == self.collection_name, response_dict["status"]["collections"])
)

if len(collection_name_matches) == 0:
Expand Down
Loading

0 comments on commit 9cb8eac

Please sign in to comment.