Added E2E tests, new GitHub workflow, and separated out unit tests

Setup neo4j db for e2e tests
neo4j · May 2, 2024 · 0eaa5aa · 0eaa5aa
1 parent e98a2d1
commit 0eaa5aa
Show file tree

Hide file tree

Showing 22 changed files with 479 additions and 28 deletions.
diff --git a/.github/workflows/pr-e2e-tests.yaml b/.github/workflows/pr-e2e-tests.yaml
@@ -0,0 +1,51 @@
+name: 'Neo4j-GenAI PR E2E Tests'
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  e2e-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        neo4j-version:
+          - 5
+        neo4j-edition:
+          - community
+          - enterprise
+    services:
+      neo4j:
+        image: neo4j:${{ matrix.neo4j-version }}-${{ matrix.neo4j-edition }}
+        env:
+          NEO4J_AUTH: neo4j/password
+          NEO4J_ACCEPT_LICENSE_AGREEMENT: yes
+        ports:
+          - 7687:7687
+          - 7474:7474
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      -   name: Install Poetry
+          run: |
+              curl -sSL https://install.python-poetry.org | python3 -
+
+      -   name: Configure Poetry
+          run: |
+              echo "$HOME/.local/bin" >> $GITHUB_PATH
+              poetry config virtualenvs.create false
+
+      -   name: Install dependencies
+          run: poetry install
+
+      -   name: Run tests
+          run: poetry run pytest ./tests/e2e
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -28,7 +28,7 @@ jobs:
         run: |
           poetry run ruff format --check .
           poetry run ruff check .
-      - name: Run tests and check coverage
+      - name: Run unit tests and check coverage
         run: |
-          poetry run coverage run -m pytest
+          poetry run coverage run -m pytest tests/unit
           poetry run coverage report --fail-under=90
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ dist/
 htmlcov/
 .idea/
 .env
+docs/build/
diff --git a/README.md b/README.md
@@ -80,7 +80,7 @@ create_vector_index(
 
 ### Populating the Neo4j Vector Index
 
-This library does not write to the database, that is up to you.  
+This library does not write to the database, that is up to you.
 See below for how to write using Cypher via the Neo4j driver.
 
 Assumption: Neo4j running with a defined vector index
@@ -161,7 +161,7 @@ Open a new virtual environment and then run the tests.
 
 ```bash
 poetry shell
-pytest
+pytest tests/unit
 ```
 
 ## Further information

diff --git a/examples/hybrid_cypher_search.py b/examples/hybrid_cypher_search.py
@@ -58,5 +58,5 @@ def embed_query(self, text: str) -> list[float]:
 driver.execute_query(insert_query, parameters)
 
 # Perform the similarity search for a text query
-query_text = "Who are the fremen?"
+query_text = "Find me a book about Fremen"
 print(retriever.search(query_text=query_text, top_k=5))
diff --git a/examples/hybrid_search.py b/examples/hybrid_search.py
@@ -55,5 +55,5 @@ def embed_query(self, text: str) -> list[float]:
 driver.execute_query(insert_query, parameters)
 
 # Perform the similarity search for a text query
-query_text = "Who are the fremen?"
+query_text = "Find me a book about Fremen"
 print(retriever.search(query_text=query_text, top_k=5))
diff --git a/examples/openai_search.py b/examples/openai_search.py
@@ -48,5 +48,5 @@
 driver.execute_query(insert_query, parameters)
 
 # Perform the similarity search for a text query
-query_text = "hello world"
+query_text = "Find me a book about Fremen"
 print(retriever.search(query_text=query_text, top_k=5))
diff --git a/examples/similarity_search_for_text.py b/examples/similarity_search_for_text.py
@@ -51,5 +51,5 @@ def embed_query(self, text: str) -> list[float]:
 driver.execute_query(insert_query, parameters)
 
 # Perform the similarity search for a text query
-query_text = "hello world"
+query_text = "Find me a book about Fremen"
 print(retriever.search(query_text=query_text, top_k=5))
diff --git a/examples/vector_cypher_retrieval.py b/examples/vector_cypher_retrieval.py
@@ -63,5 +63,5 @@ def random_str(n: int) -> str:
 driver.execute_query(insert_query, parameters)
 
 # Perform the search
-query_text = "Find me the closest text"
+query_text = "Find me a book about Fremen"
 print(retriever.search(query_text=query_text, top_k=1))
diff --git a/src/neo4j_genai/indexes.py b/src/neo4j_genai/indexes.py
@@ -115,7 +115,7 @@ def drop_index(driver: Driver, name: str) -> None:
         driver (Driver): Neo4j Python driver instance.
         name (str): The name of the index to delete.
     """
-    query = "DROP INDEX $name"
+    query = "DROP INDEX $name IF EXISTS"
     parameters = {
         "name": name,
     }

diff --git a/src/neo4j_genai/neo4j_queries.py b/src/neo4j_genai/neo4j_queries.py
@@ -25,6 +25,7 @@ def get_search_query(
     query_map = {
         SearchType.VECTOR: (
             "CALL db.index.vector.queryNodes($index_name, $top_k, $query_vector) "
+            "YIELD node, score "
         ),
         SearchType.HYBRID: (
             "CALL { "
@@ -44,13 +45,27 @@ def get_search_query(
     base_query = query_map[search_type]
     additional_query = ""
 
-    if retrieval_query:
-        additional_query += retrieval_query
-    elif return_properties:
-        return_properties_cypher = ", ".join([f".{prop}" for prop in return_properties])
-        additional_query += "YIELD node, score "
-        additional_query += f"RETURN node {{{return_properties_cypher}}} as node, score"
-    else:
-        additional_query += "RETURN node, score"
+    if search_type == SearchType.VECTOR:
+        if retrieval_query:
+            additional_query += retrieval_query
+        elif return_properties:
+            return_properties_cypher = ", ".join(
+                [f".{prop}" for prop in return_properties]
+            )
+            additional_query += (
+                f"RETURN node {{{return_properties_cypher}}} as node, score"
+            )
+    elif search_type == SearchType.HYBRID:
+        if retrieval_query:
+            additional_query += retrieval_query
+        elif return_properties:
+            return_properties_cypher = ", ".join(
+                [f".{prop}" for prop in return_properties]
+            )
+            additional_query += (
+                f"RETURN node {{{return_properties_cypher}}} as node, score"
+            )
+        else:
+            additional_query += "RETURN node, score"
 
     return base_query + additional_query
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
@@ -0,0 +1,90 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import string
+import random
+import uuid
+
+import pytest
+from neo4j import GraphDatabase
+from neo4j_genai.embedder import Embedder
+from neo4j_genai.indexes import drop_index, create_vector_index, create_fulltext_index
+
+
+@pytest.fixture(scope="module")
+def driver():
+    uri = "neo4j://localhost:7687"
+    auth = ("neo4j", "password")
+    driver = GraphDatabase.driver(uri, auth=auth)
+    yield driver
+    driver.close()
+
+
+@pytest.fixture(scope="module")
+def custom_embedder():
+    class CustomEmbedder(Embedder):
+        def embed_query(self, text: str) -> list[float]:
+            return [random.random() for _ in range(1536)]
+
+    return CustomEmbedder()
+
+
+@pytest.fixture(scope="module")
+def setup_neo4j(driver):
+    vector_index_name = "vector-index-name"
+    fulltext_index_name = "fulltext-index-name"
+
+    # Delete data and drop indexes to prevent data leakage
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    drop_index(driver, vector_index_name)
+    drop_index(driver, fulltext_index_name)
+
+    # Create a vector index
+    create_vector_index(
+        driver,
+        vector_index_name,
+        label="Document",
+        property="propertyKey",
+        dimensions=1536,
+        similarity_fn="euclidean",
+    )
+
+    # Create a fulltext index
+    create_fulltext_index(
+        driver, fulltext_index_name, label="Document", node_properties=["propertyKey"]
+    )
+
+    # Insert 10 vectors and authors
+    vector = [random.random() for _ in range(1536)]
+
+    def random_str(n: int) -> str:
+        return "".join([random.choice(string.ascii_letters) for _ in range(n)])
+
+    for i in range(10):
+        insert_query = (
+            "MERGE (doc:Document {id: $id})"
+            "WITH doc "
+            "CALL db.create.setNodeVectorProperty(doc, 'propertyKey', $vector)"
+            "WITH doc "
+            "MERGE (author:Author {name: $authorName})"
+            "MERGE (doc)-[:AUTHORED_BY]->(author)"
+            "RETURN doc, author"
+        )
+
+        parameters = {
+            "id": str(uuid.uuid4()),
+            "vector": vector,
+            "authorName": random_str(10),
+        }
+        driver.execute_query(insert_query, parameters)
diff --git a/tests/e2e/test_hybrid_e2e.py b/tests/e2e/test_hybrid_e2e.py
@@ -0,0 +1,132 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+
+import pytest
+
+from neo4j import Record
+
+from neo4j_genai import (
+    HybridRetriever,
+    HybridCypherRetriever,
+)
+
+
+@pytest.mark.usefixtures("setup_neo4j")
+def test_hybrid_retriever_search_text(driver, custom_embedder):
+    retriever = HybridRetriever(
+        driver, "vector-index-name", "fulltext-index-name", custom_embedder
+    )
+
+    top_k = 5
+    results = retriever.search(query_text="Find me a book about Fremen", top_k=top_k)
+
+    assert isinstance(results, list)
+    assert len(results) == 5
+    for result in results:
+        assert isinstance(result, Record)
+
+
+@pytest.mark.usefixtures("setup_neo4j")
+def test_hybrid_cypher_retriever_search_text(driver, custom_embedder):
+    retrieval_query = (
+        "MATCH (node)-[:AUTHORED_BY]->(author:Author) " "RETURN author.name"
+    )
+    retriever = HybridCypherRetriever(
+        driver,
+        "vector-index-name",
+        "fulltext-index-name",
+        retrieval_query,
+        custom_embedder,
+    )
+
+    top_k = 5
+    results = retriever.search(query_text="Find me a book about Fremen", top_k=top_k)
+
+    assert isinstance(results, list)
+    assert len(results) == 5
+    for record in results:
+        assert isinstance(record, Record)
+        assert "author.name" in record.keys()
+
+
+@pytest.mark.usefixtures("setup_neo4j")
+def test_hybrid_retriever_search_vector(driver):
+    retriever = HybridRetriever(
+        driver,
+        "vector-index-name",
+        "fulltext-index-name",
+    )
+
+    top_k = 5
+    results = retriever.search(
+        query_text="Find me a book about Fremen",
+        query_vector=[1.0 for _ in range(1536)],
+        top_k=top_k,
+    )
+
+    assert isinstance(results, list)
+    assert len(results) == 5
+    for result in results:
+        assert isinstance(result, Record)
+
+
+@pytest.mark.usefixtures("setup_neo4j")
+def test_hybrid_cypher_retriever_search_vector(driver):
+    retrieval_query = (
+        "MATCH (node)-[:AUTHORED_BY]->(author:Author) " "RETURN author.name"
+    )
+    retriever = HybridCypherRetriever(
+        driver,
+        "vector-index-name",
+        "fulltext-index-name",
+        retrieval_query,
+    )
+
+    top_k = 5
+    results = retriever.search(
+        query_text="Find me a book about Fremen",
+        query_vector=[1.0 for _ in range(1536)],
+        top_k=top_k,
+    )
+
+    assert isinstance(results, list)
+    assert len(results) == 5
+    for record in results:
+        assert isinstance(record, Record)
+        assert "author.name" in record.keys()
+
+
+@pytest.mark.usefixtures("setup_neo4j")
+def test_hybrid_retriever_return_properties(driver):
+    properties = ["name", "age"]
+    retriever = HybridRetriever(
+        driver,
+        "vector-index-name",
+        "fulltext-index-name",
+        return_properties=properties,
+    )
+
+    top_k = 5
+    results = retriever.search(
+        query_text="Find me a book about Fremen",
+        query_vector=[1.0 for _ in range(1536)],
+        top_k=top_k,
+    )
+
+    assert isinstance(results, list)
+    assert len(results) == 5
+    for result in results:
+        assert isinstance(result, Record)
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ dist/ @@
     htmlcov/
     .idea/
     .env
+    docs/build/