Skip to content

Commit

Permalink
Merge branch 'v0.1.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
MVYaroshenko committed Jun 11, 2024
2 parents a67b55f + c4b9b4a commit e61d1c6
Show file tree
Hide file tree
Showing 25 changed files with 1,001 additions and 21 deletions.
Empty file.
48 changes: 48 additions & 0 deletions programs/db/chromadb/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from chromadb import PersistentClient, Documents, Embeddings

from utca.core import AddData, ReplacingScope
from utca.implementation.tasks import TransformersTextEmbedding
from utca.implementation.datasources.db import (
ChromaDBGetOrCreateCollection,
ChromaDBCollectionAdd,
ChromaDBEmbeddingFunctionComponent,
ChromaDBCollectionQuery,
)

# Sentences for dataset
sentences = [
"People who test positive for Covid-19 no longer need to routinely stay away from others for at least five days, according to new guidelines from the US Centers for Disease Control and Prevention issued Friday.",
"The change ends a strategy from earlier in the pandemic that experts said has been important to controlling the spread of the infection.",
"Whether it be the latest prized Stanley cup or that 10-year-old plastic spout bottle you don’t go anywhere without, “emotional support water bottles” seem to be stuck to our sides and not going anywhere.",
"Health officials in Alaska recently reported the first known human death from a virus called Alaskapox.",
"Blizzard conditions continued to slam Northern California over the weekend with damaging winds and heavy snow dumping on mountain ridges down to the valleys.",
]

class EmbeddingFunction(ChromaDBEmbeddingFunctionComponent[Documents]):
def __call__(self, documents: Documents) -> Embeddings:
return embedding_pipe.run({"texts": documents})["embeddings"].tolist()

if __name__ == "__main__":
embedding_pipe = TransformersTextEmbedding()

pipe = (
AddData({
"collection_name": "test",
})
| ChromaDBGetOrCreateCollection(
client=PersistentClient(), embedding_function=EmbeddingFunction(embedding_pipe) # type: ignore
).use(get_key="collection_name")
| AddData({
"documents": sentences,
"ids": [f"id_{i}" for i in range(1, len(sentences)+1)]
})
| ChromaDBCollectionAdd()
| AddData({
"query_texts": ["Bad weather"],
"n_results": 1,
"include": ["documents", "distances"]
})
| ChromaDBCollectionQuery().use(set_key="results", replace=ReplacingScope.GLOBAL)
)

print(pipe.run()["results"])
13 changes: 13 additions & 0 deletions programs/db/graph/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Neo4j

To run this example, you can run Docker locally. Execute the following command to start the container before running the program:

``` console
sh run_neo4j_docker.sh
```

Then, run the program:

``` console
python main.py
```
Empty file added programs/db/graph/__init__.py
Empty file.
86 changes: 86 additions & 0 deletions programs/db/graph/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from __future__ import annotations
from typing import Any, Dict, cast

from neo4j import ManagedTransaction

from utca.core import While, ExecuteFunction
from utca.implementation.datasources.db import (
Neo4jClient, Neo4jWriteAction
)

employee_threshold=10

def employ_person_tx(tx: ManagedTransaction, name: str) -> str:
# Create new Person node with given name, if not exists already
result = tx.run("""
MERGE (p:Person {name: $name})
RETURN p.name AS name
""", name=name
)

# Obtain most recent organization ID and the number of people linked to it
result = tx.run("""
MATCH (o:Organization)
RETURN o.id AS id, COUNT{(p:Person)-[r:WORKS_FOR]->(o)} AS employees_n
ORDER BY o.created_date DESC
LIMIT 1
""")
org = result.single()

if org is not None and org["employees_n"] == 0:
raise Exception("Most recent organization is empty.")
# Transaction will roll back -> not even Person is created!

# If org does not have too many employees, add this Person to that
if org is not None and org.get("employees_n") < employee_threshold:
result = tx.run("""
MATCH (o:Organization {id: $org_id})
MATCH (p:Person {name: $name})
MERGE (p)-[r:WORKS_FOR]->(o)
RETURN $org_id AS id
""", org_id=org["id"], name=name
)

# Otherwise, create a new Organization and link Person to it
else:
result = tx.run("""
MATCH (p:Person {name: $name})
CREATE (o:Organization {id: randomuuid(), created_date: datetime()})
MERGE (p)-[r:WORKS_FOR]->(o)
RETURN o.id AS id
""", name=name
)

# Return the Organization ID to which the new Person ends up in
return cast(str, result.single(strict=True)["id"])

if __name__ == "__main__":
# See shell script for docker
client = Neo4jClient(
url="neo4j://localhost:7687",
user="neo4j",
password="password",
)

employee_id = 0
def employee_name(_: Any) -> Dict[str, Any]:
global employee_id
employee_id += 1
return {"kwargs": {"name": f"Thor{employee_id}"}}

def print_message(input_data: Dict[str, Any]) -> None:
print(f'User {input_data["kwargs"]["name"]} added to organization {input_data["org_id"]}')

p = While(
ExecuteFunction(employee_name)
| Neo4jWriteAction(
database="neo4j",
transaction_function=employ_person_tx,
client=client,
).use(set_key="org_id")
| ExecuteFunction(print_message),
max_iterations=100,
)
p.run()

client.close()
4 changes: 4 additions & 0 deletions programs/db/graph/run_neo4j_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker run \
--publish=7474:7474 --publish=7687:7687 \
--env NEO4J_AUTH=neo4j/password \
neo4j
Empty file added programs/db/sql/__init__.py
Empty file.
3 changes: 1 addition & 2 deletions programs/db/main.py → programs/db/sql/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations
from typing import List, Optional

from typing import List
from typing import Optional
from sqlalchemy import (
ForeignKey,
String,
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "utca"
version = "0.1.0"
version = "0.1.1"
description = ""
authors = ["knowledgator.com"]
readme = "README.md"
Expand All @@ -27,6 +27,8 @@ requests-html = "^0.10.0"
reportlab = "^4.2.0"
requests = "^2.32.2"
gliner = "^0.2.2"
neo4j = "^5.20.0"
chromadb = "^0.5.0"


[build-system]
Expand Down
30 changes: 30 additions & 0 deletions src/utca/implementation/datasources/db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,40 @@
from utca.implementation.datasources.db.sqlalchemy.main import (
SQLSessionFactory, BaseModel, SQLAction, SQLActionWithReturns
)
from utca.implementation.datasources.db.neo4j.main import (
Neo4jClient, Neo4jReadAction, Neo4jWriteAction
)
from utca.implementation.datasources.db.chroma.main import (
ChromaDBCollectionAdd,
ChromaDBCollectionUpdate,
ChromaDBCollectionUpsert,
ChromaDBGetCollection,
ChromaDBCreateCollection,
ChromaDBGetOrCreateCollection,
ChromaDBDeleteCollection,
ChromaDBCollectionGet,
ChromaDBCollectionQuery,
)
from utca.implementation.datasources.db.chroma.schema import (
ChromaDBEmbeddingFunctionComponent,
)

__all__ = [
"SQLSessionFactory",
"BaseModel",
"SQLAction",
"SQLActionWithReturns",
"Neo4jClient",
"Neo4jReadAction",
"Neo4jWriteAction",
"ChromaDBGetCollection",
"ChromaDBCreateCollection",
"ChromaDBGetOrCreateCollection",
"ChromaDBDeleteCollection",
"ChromaDBCollectionAdd",
"ChromaDBCollectionUpdate",
"ChromaDBCollectionUpsert",
"ChromaDBCollectionGet",
"ChromaDBCollectionQuery",
"ChromaDBEmbeddingFunctionComponent",
]
Empty file.
Loading

0 comments on commit e61d1c6

Please sign in to comment.