Skip to content

Commit

Permalink
Merge pull request deepset-ai#32 from Anant/zero_embeddings
Browse files Browse the repository at this point in the history
Zero embeddings and examples fixes
  • Loading branch information
ElenaKusevska authored Dec 7, 2023
2 parents 06cc557 + bc0a9e0 commit 7fcfe33
Show file tree
Hide file tree
Showing 4 changed files with 913 additions and 23 deletions.
38 changes: 22 additions & 16 deletions examples/example.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from pathlib import Path

from haystack.preview import Document
from haystack.preview import Pipeline
from haystack.preview.components.file_converters import TextFileToDocument
from haystack.preview.components.writers import DocumentWriter
Expand All @@ -10,6 +11,8 @@
from astra_store.document_store import AstraDocumentStore
from astra_store.retriever import AstraRetriever

from preprocessor import PreProcessor

HERE = Path(__file__).resolve().parent
file_paths = [HERE / "data" / Path(name) for name in os.listdir("examples/data")]

Expand All @@ -31,17 +34,14 @@
embedding_dim=384,
)

document_store.delete_documents(delete_all=True)
print("count:")
print(document_store.count_documents())
add_example_data(document_store, "examples/data")

# indexing = Pipeline()
# indexing.add_component("converter", TextFileToDocument())
# indexing.add_component("writer", DocumentWriter(document_store))
# indexing.connect("converter", "writer")
# print("Indexing data...")
# indexing.run({"converter": {"paths": file_paths}})
indexing = Pipeline()
indexing.add_component("converter", TextFileToDocument())
converter_results = indexing.run({"converter": {"paths": file_paths}})
preprocessor = PreProcessor(split_by="word", split_length=200, split_overlap=0, split_respect_sentence_boundary=True)
docs_processed = preprocessor.process(converter_results["converter"]["documents"])
document_store.write_documents(docs_processed, embed=True)


querying = Pipeline()
querying.add_component("retriever", AstraRetriever(document_store))
Expand All @@ -55,21 +55,27 @@
assert document_store.count_documents() == 6

print("filter:")
print(document_store.filter_documents({"content_type": "text"}))
print(document_store.filter_documents({"mime_type": "text/plain"}))


print("search without filter")
print(document_store.search(["Is black and white text boring?"], 3))
print("search with filter")
print(document_store.search(["Is black and white text boring?"], 3, {"content_type": "text"}))
print(document_store.search(["Is black and white text boring?"], 3, {"mime_type": "text/plain"}))

print("get_document_by_id and embeddings *********")
print(document_store.get_document_by_id("539fb0d47917e832bbc661e55edb8b90"))
print(document_store.get_document_by_id("92e095d5bfd66e31bb099de89bab9101474660904818fa428a8b889996c14a62"))
print("get_documents_by_ids and embeddings *********")
print(document_store.get_documents_by_id(["23dc6bb45225bade2764d856a0e1a6b3"]))
print(
document_store.get_documents_by_id(
[
"1df1b4b0b21e4015cf1d0976db1185b81fe9d3c07b630d6abac582ccd2b38a37",
"9f11bd49e9ca4f895ac3062f01ae7332ae3e1cabd13a42ff8db41d6e83fd6479",
]
)
)

document_store.delete_documents(["6f387cf0786d48d3768d605b44108241"])
# document_store.delete_documents(delete_all=True)
document_store.delete_documents(["7830332ffa979794b03cdaa6d3660bc0aa44f463014da9746ba7f3b987641967"])

print("count:")
print(document_store.count_documents())
Expand Down
8 changes: 4 additions & 4 deletions examples/pipeline_example.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from haystack import Document
from haystack.preview import Document
from haystack.preview import Pipeline
from haystack.preview.components.builders.answer_builder import AnswerBuilder
from haystack.preview.components.builders.prompt_builder import PromptBuilder
Expand Down Expand Up @@ -55,12 +55,12 @@

# Add Documents
documents = [
Document("There are over 7,000 languages spoken around the world today."),
Document(text="There are over 7,000 languages spoken around the world today."),
Document(
"Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors."
text="Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors."
),
Document(
"In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves."
text="In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves."
),
]
rag_pipeline.get_component("retriever").document_store.write_documents(documents)
Expand Down
Loading

0 comments on commit 7fcfe33

Please sign in to comment.