From 53aa093a41f67028722a8e294fecab6a99279744 Mon Sep 17 00:00:00 2001 From: Will Tai Date: Fri, 19 Apr 2024 13:44:13 +0100 Subject: [PATCH 1/2] Added example for VectorCypherRetriever --- examples/vector_cypher_retrieval.py | 67 +++++++++++++++++++++++++++++ src/neo4j_genai/retrievers.py | 2 +- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 examples/vector_cypher_retrieval.py diff --git a/examples/vector_cypher_retrieval.py b/examples/vector_cypher_retrieval.py new file mode 100644 index 00000000..8ca829e2 --- /dev/null +++ b/examples/vector_cypher_retrieval.py @@ -0,0 +1,67 @@ +from neo4j import GraphDatabase +from neo4j_genai import VectorCypherRetriever + +import random +import string +from neo4j_genai.embedder import Embedder +from neo4j_genai.indexes import create_vector_index + + +URI = "neo4j://localhost:7687" +AUTH = ("neo4j", "password") + +INDEX_NAME = "embedding-name" +DIMENSION = 1536 + +# Connect to Neo4j database +driver = GraphDatabase.driver(URI, auth=AUTH) + + +# Create Embedder object +class CustomEmbedder(Embedder): + def embed_query(self, text: str) -> list[float]: + return [random.random() for _ in range(DIMENSION)] + + +# Generate random strings +def random_str(n: int) -> str: + return "".join([random.choice(string.ascii_letters) for _ in range(n)]) + + +embedder = CustomEmbedder() + +# Creating the index +create_vector_index( + driver, + INDEX_NAME, + label="Document", + property="propertyKey", + dimensions=DIMENSION, + similarity_fn="euclidean", +) + +# Initialize the retriever +retrieval_query = "MATCH (node)-[:AUTHORED_BY]->(author:Author)" "RETURN author.name" +retriever = VectorCypherRetriever(driver, INDEX_NAME, retrieval_query, embedder) + +# Upsert the query +vector = [random.random() for _ in range(DIMENSION)] +insert_query = ( + "MERGE (doc:Document {id: $id})" + "WITH doc " + "CALL db.create.setNodeVectorProperty(doc, 'propertyKey', $vector)" + "WITH doc " + "MERGE (author:Author {name: $authorName})" + "MERGE (doc)-[:AUTHORED_BY]->(author)" + "RETURN doc, author" +) +parameters = { + "id": random.randint(0, 10000), + "vector": vector, + "authorName": random_str(10), +} +driver.execute_query(insert_query, parameters) + +# Perform the search +query_text = "Find me the closest text" +print(retriever.search(query_text=query_text, top_k=1)) diff --git a/src/neo4j_genai/retrievers.py b/src/neo4j_genai/retrievers.py index 2653ba66..f9621bdd 100644 --- a/src/neo4j_genai/retrievers.py +++ b/src/neo4j_genai/retrievers.py @@ -167,7 +167,7 @@ def search( ValueError: If no embedder is provided. Returns: - Any: The results of the search query + list[Neo4jRecord]: The results of the search query """ try: validated_data = VectorCypherSearchModel( From dda1d81560483496b1c463ea7734a141684e9386 Mon Sep 17 00:00:00 2001 From: Will Tai Date: Mon, 22 Apr 2024 10:25:34 +0100 Subject: [PATCH 2/2] Adds query prefix to docstring --- src/neo4j_genai/retrievers.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/neo4j_genai/retrievers.py b/src/neo4j_genai/retrievers.py index f9621bdd..6f1c8f45 100644 --- a/src/neo4j_genai/retrievers.py +++ b/src/neo4j_genai/retrievers.py @@ -127,7 +127,14 @@ def search( class VectorCypherRetriever(VectorRetriever): """ - Provides retrieval method using vector similarity and custom Cypher query + Provides retrieval method using vector similarity and custom Cypher query. + When providing the custom query, note that the existing variable `node` can be used. + The query prefix: + ``` + CALL db.index.vector.queryNodes($index_name, $top_k, $query_vector) + YIELD node, score + ``` + """ def __init__(