Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community: KuzuGraph needs allow_dangerous_requests, add graph documents via LLMGraphTransformer #27949

Merged
merged 39 commits into from
Dec 10, 2024
Merged
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
cc34b35
Add graph documents via LLMGraphTransformer
prrao87 Nov 6, 2024
59be82e
Fix ruff error
prrao87 Nov 6, 2024
3d29ffe
Add new SQL Vector Store Langchain Integration
Pookam90 Nov 11, 2024
513e63b
Fixing the link
Pookam90 Nov 11, 2024
3dc7538
Update microsoft.mdx
Pookam90 Nov 11, 2024
279639e
Merge branch 'langchain-ai:master' into sqlvector-langchain-branch
Pookam90 Nov 11, 2024
ee113ec
remove outdated docs
hsm207 Nov 12, 2024
819681a
Merge branch 'master' into add_graph_document
prrao87 Nov 15, 2024
dbabe41
Update FeatureTables.js
Pookam90 Nov 18, 2024
f2994b0
Adding sqlserver vectorstore
Pookam90 Nov 18, 2024
ca91673
Adding SQLserver vector store
Pookam90 Nov 18, 2024
cff5839
Merge branch 'sqlvector-langchain-branch' of https://github.com/Pooka…
Pookam90 Nov 18, 2024
ec7fee0
Update FeatureTables.js
Pookam90 Nov 18, 2024
3aa8033
Adding SQL Server Vector store to MSFT integrations
Pookam90 Nov 18, 2024
3d41b9f
Merge branch 'sqlvector-langchain-branch' of https://github.com/Pooka…
Pookam90 Nov 18, 2024
68c6e08
Removing AzureSQL and replacing with SQLServer
Pookam90 Nov 18, 2024
090679a
Merge branch 'langchain-ai:master' into sqlvector-langchain-branch
Pookam90 Nov 18, 2024
4ee2b8f
Adding sqlserver vector store with makeformat,lint
Pookam90 Nov 18, 2024
6bbb645
adding sqlserver vector store with format
Pookam90 Nov 18, 2024
8f107d0
adding SQLserver vector store after format/lint
Pookam90 Nov 18, 2024
7add9f4
"Fixing missing headers for sqlserver vectorstore"
Pookam90 Nov 18, 2024
e269a7b
"updating sqlserver vector store documentation"
Pookam90 Nov 18, 2024
e0ab27e
"Updating sqlserver vector store with template"
Pookam90 Nov 18, 2024
3720e26
fixing typos sqlserver vectorstore
Pookam90 Nov 18, 2024
fc70b76
fixing typos
Pookam90 Nov 18, 2024
60c1aa0
Merge branch 'master' into add_graph_document
prrao87 Nov 28, 2024
7aa92a1
Merge branch 'master' into add_graph_document
prrao87 Dec 4, 2024
7bdf11c
Merge branch 'master' into sqlvector-langchain-branch
efriis Dec 9, 2024
f2a4121
merge
efriis Dec 10, 2024
2943402
x
efriis Dec 10, 2024
165cfe5
x
efriis Dec 10, 2024
976d5e2
x
efriis Dec 10, 2024
3c10676
x
efriis Dec 10, 2024
ad02be3
merge sqlvector-langchain-branch
efriis Dec 10, 2024
4176f8e
x
efriis Dec 10, 2024
122597d
merge docs-weaviate-hybrid
efriis Dec 10, 2024
a0f14d1
Merge branch 'master' into add_graph_document
efriis Dec 10, 2024
8b713ba
x
efriis Dec 10, 2024
8f52d2f
x
efriis Dec 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 170 additions & 4 deletions libs/community/langchain_community/graphs/kuzu_graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import Any, Dict, List
from hashlib import md5
from typing import Any, Dict, List, Tuple

from langchain_community.graphs.graph_document import GraphDocument, Relationship


class KuzuGraph:
Expand All @@ -16,7 +19,19 @@ class KuzuGraph:
See https://python.langchain.com/docs/security for more information.
"""

def __init__(self, db: Any, database: str = "kuzu") -> None:
def __init__(
self, db: Any, database: str = "kuzu", allow_dangerous_requests: bool = False
) -> None:
"""Initializes the Kùzu graph database connection."""

if allow_dangerous_requests is not True:
raise ValueError(
"The KuzuGraph class is a powerful tool that can be used to execute "
"arbitrary queries on the database. To enable this functionality, "
"set the `allow_dangerous_requests` parameter to `True` when "
"constructing the KuzuGraph object."
)

try:
import kuzu
except ImportError:
Expand Down Expand Up @@ -57,7 +72,7 @@ def refresh_schema(self) -> None:
if properties[property_name]["dimension"] > 0:
if "shape" in properties[property_name]:
for s in properties[property_name]["shape"]:
list_type_flag += "[%s]" % s
list_type_flag += f"[{s}]"
else:
for i in range(properties[property_name]["dimension"]):
list_type_flag += "[]"
Expand All @@ -71,7 +86,7 @@ def refresh_schema(self) -> None:
rel_tables = self.conn._get_rel_table_names()
for table in rel_tables:
relationships.append(
"(:%s)-[:%s]->(:%s)" % (table["src"], table["name"], table["dst"])
f"(:{table['src']})-[:{table['name']}]->(:{table['dst']})"
)

rel_properties = []
Expand All @@ -93,3 +108,154 @@ def refresh_schema(self) -> None:
f"Relationships properties: {rel_properties}\n"
f"Relationships: {relationships}\n"
)

def _create_chunk_node_table(self) -> None:
self.conn.execute(
"""
CREATE NODE TABLE IF NOT EXISTS Chunk (
id STRING,
text STRING,
type STRING,
PRIMARY KEY(id)
);
"""
)

def _create_entity_node_table(self, node_label: str) -> None:
self.conn.execute(
f"""
CREATE NODE TABLE IF NOT EXISTS {node_label} (
id STRING,
type STRING,
PRIMARY KEY(id)
);
"""
)

def _create_entity_relationship_table(self, rel: Relationship) -> None:
self.conn.execute(
f"""
CREATE REL TABLE IF NOT EXISTS {rel.type} (
FROM {rel.source.type} TO {rel.target.type}
);
"""
)

def add_graph_documents(
self,
graph_documents: List[GraphDocument],
allowed_relationships: List[Tuple[str, str, str]],
include_source: bool = False,
) -> None:
"""
Adds a list of `GraphDocument` objects that represent nodes and relationships
in a graph to a Kùzu backend.

Parameters:
- graph_documents (List[GraphDocument]): A list of `GraphDocument` objects
that contain the nodes and relationships to be added to the graph. Each
`GraphDocument` should encapsulate the structure of part of the graph,
including nodes, relationships, and the source document information.

- allowed_relationships (List[Tuple[str, str, str]]): A list of allowed
relationships that exist in the graph. Each tuple contains three elements:
the source node type, the relationship type, and the target node type.
Required for Kùzu, as the names of the relationship tables that need to
pre-exist are derived from these tuples.

- include_source (bool): If True, stores the source document
and links it to nodes in the graph using the `MENTIONS` relationship.
This is useful for tracing back the origin of data. Merges source
documents based on the `id` property from the source document metadata
if available; otherwise it calculates the MD5 hash of `page_content`
for merging process. Defaults to False.
"""
# Get unique node labels in the graph documents
node_labels = list(
{node.type for document in graph_documents for node in document.nodes}
)

for document in graph_documents:
# Add chunk nodes and create source document relationships if include_source
# is True
if include_source:
self._create_chunk_node_table()
if not document.source.metadata.get("id"):
# Add a unique id to each document chunk via an md5 hash
document.source.metadata["id"] = md5(
document.source.page_content.encode("utf-8")
).hexdigest()

self.conn.execute(
f"""
MERGE (c:Chunk {{id: $id}})
SET c.text = $text,
c.type = "text_chunk"
""", # noqa: F541
parameters={
"id": document.source.metadata["id"],
"text": document.source.page_content,
},
)

for node_label in node_labels:
self._create_entity_node_table(node_label)

# Add entity nodes from data
for node in document.nodes:
self.conn.execute(
f"""
MERGE (e:{node.type} {{id: $id}})
SET e.type = "entity"
""",
parameters={"id": node.id},
)
if include_source:
# If include_source is True, we need to create a relationship table
# between the chunk nodes and the entity nodes
self._create_chunk_node_table()
ddl = "CREATE REL TABLE GROUP IF NOT EXISTS MENTIONS ("
table_names = []
for node_label in node_labels:
table_names.append(f"FROM Chunk TO {node_label}")
table_names = list(set(table_names))
ddl += ", ".join(table_names)
# Add common properties for all the tables here
ddl += ", label STRING, triplet_source_id STRING)"
if ddl:
self.conn.execute(ddl)

# Only allow relationships that exist in the schema
if node.type in node_labels:
self.conn.execute(
f"""
MATCH (c:Chunk {{id: $id}}),
(e:{node.type} {{id: $node_id}})
MERGE (c)-[m:MENTIONS]->(e)
SET m.triplet_source_id = $id
""",
parameters={
"id": document.source.metadata["id"],
"node_id": node.id,
},
)

# Add entity relationships
for rel in document.relationships:
self._create_entity_relationship_table(rel)
# Create relationship
source_label = rel.source.type
source_id = rel.source.id
target_label = rel.target.type
target_id = rel.target.id
self.conn.execute(
f"""
MATCH (e1:{source_label} {{id: $source_id}}),
(e2:{target_label} {{id: $target_id}})
MERGE (e1)-[:{rel.type}]->(e2)
""",
parameters={
"source_id": source_id,
"target_id": target_id,
},
)
Loading