Skip to content

Commit

Permalink
rename retriever
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 committed Feb 12, 2024
1 parent d02286c commit c14a054
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 1,323 deletions.
50 changes: 50 additions & 0 deletions integrations/pinecone/examples/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Install the Pinecone integration, Haystack will come as a dependency
# Install also some optional dependencies needed for Markdown conversion and text embedding
# pip install -U pinecone-haystack markdown-it-py mdit_plain "sentence-transformers>=2.2.0"

# Download some markdown files to index
# git clone https://github.com/anakin87/neural-search-pills


# Create the indexing Pipeline and index some documents

import glob

from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from pinecone_haystack import PineconeDocumentStore
from pinecone_haystack.dense_retriever import PineconeEmbeddingRetriever

file_paths = glob.glob("neural-search-pills/pills/*.md")

document_store = PineconeDocumentStore(
api_key="YOUR-PINECONE-API-KEY", environment="gcp-starter", index="default", namespace="default", dimension=768
)

indexing = Pipeline()
indexing.add_component("converter", MarkdownToDocument())
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

indexing.run({"converter": {"sources": file_paths}})


# Create the querying Pipeline and try a query

querying = Pipeline()
querying.add_component("embedder", SentenceTransformersTextEmbedder())
querying.add_component("retriever", PineconeEmbeddingRetriever(document_store=document_store, top_k=3))
querying.connect("embedder", "retriever")

results = querying.run({"embedder": {"text": "What is Question Answering?"}})

for doc in results["retriever"]["documents"]:
print(doc)
print("-" * 10)
Loading

0 comments on commit c14a054

Please sign in to comment.