Skip to content

Commit

Permalink
add example (deepset-ai#334)
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 authored Feb 5, 2024
1 parent 96f6ade commit 6d18bc4
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 0 deletions.
58 changes: 58 additions & 0 deletions integrations/pgvector/examples/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Before running this example, ensure you have PostgreSQL installed with the pgvector extension.
# For a quick setup using Docker:
# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres
# -e POSTGRES_DB=postgres ankane/pgvector

# Install required packages for this example, including pgvector-haystack and other libraries needed
# for Markdown conversion and embeddings generation. Use the following command:
# pip install pgvector-haystack markdown-it-py mdit_plain "sentence-transformers>=2.2.0"

# Download some Markdown files to index.
# git clone https://github.com/anakin87/neural-search-pills

import glob

from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack_integrations.components.retrievers.pgvector import PgvectorEmbeddingRetriever
from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore

# Initialize PgvectorDocumentStore
document_store = PgvectorDocumentStore(
connection_string="postgresql://postgres:postgres@localhost:5432/postgres",
table_name="haystack_test",
embedding_dimension=768,
vector_function="cosine_similarity",
recreate_table=True,
search_strategy="hnsw",
)

# Create the indexing Pipeline and index some documents
file_paths = glob.glob("neural-search-pills/pills/*.md")


indexing = Pipeline()
indexing.add_component("converter", MarkdownToDocument())
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

indexing.run({"converter": {"sources": file_paths}})

# Create the querying Pipeline and try a query
querying = Pipeline()
querying.add_component("embedder", SentenceTransformersTextEmbedder())
querying.add_component("retriever", PgvectorEmbeddingRetriever(document_store=document_store, top_k=3))
querying.connect("embedder", "retriever")

results = querying.run({"embedder": {"text": "What is a cross-encoder?"}})

for doc in results["retriever"]["documents"]:
print(doc)
print("-" * 10)
2 changes: 2 additions & 0 deletions integrations/pgvector/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ ban-relative-imports = "parents"
[tool.ruff.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252"]
# examples can contain "print" commands
"examples/**/*" = ["T201"]

[tool.coverage.run]
source_pkgs = ["src", "tests"]
Expand Down

0 comments on commit 6d18bc4

Please sign in to comment.