googleapis · vishwarajanand · Dec 17, 2024 · Dec 17, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/samples/migrations/snippets/alloydb_snippets.py b/samples/migrations/snippets/alloydb_snippets.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import sys
+import uuid
+from typing import Optional
+
+# [START langchain_alloydb_get_client]
+from langchain_google_alloydb_pg import AlloyDBEngine
+
+
+async def aget_client(
+    project_id: str,
+    region: str,
+    cluster: str,
+    instance: str,
+    database: str,
+    user: Optional[str] = None,
+    password: Optional[str] = None,
+) -> AlloyDBEngine:
+    engine = await AlloyDBEngine.afrom_instance(
+        project_id=project_id,
+        region=region,
+        cluster=cluster,
+        instance=instance,
+        database=database,
+        user=user,
+        password=password,
+    )
+
+    print("Langchain AlloyDB client initiated.")
+    return engine
+
+
+# [END langchain_alloydb_get_client]
+
+# [START langchain_alloydb_fake_embedding_service]
+from langchain_core.embeddings import FakeEmbeddings
+
+
+def get_embeddings_service(size: int) -> FakeEmbeddings:
+    embeddings_service = FakeEmbeddings(size=size)
+
+    print("Langchain FakeEmbeddings service initiated.")
+    return embeddings_service
+
+
+# [END langchain_alloydb_fake_embedding_service]
+
+
+# [START langchain_create_alloydb_vector_store_table]
+async def ainit_vector_store(
+    engine: AlloyDBEngine, table_name: str, vector_size: int, **kwargs: dict
+) -> None:
+    await engine.ainit_vectorstore_table(
+        table_name=table_name,
+        vector_size=vector_size,
+        overwrite_existing=True,
+        **kwargs,
+    )
+
+    print("Langchain AlloyDB vector store table initialized.")
+
+
+# [END langchain_create_alloydb_vector_store_table]
+
+
+# [START langchain_get_alloydb_vector_store]
+from langchain_core.embeddings import Embeddings
+
+from langchain_google_alloydb_pg import AlloyDBVectorStore
+
+
+async def aget_vector_store(
+    engine: AlloyDBEngine, embeddings_service: Embeddings, table_name: str
+) -> AlloyDBVectorStore:
+    vector_store = await AlloyDBVectorStore.create(
+        engine=engine,
+        embedding_service=embeddings_service,
+        table_name=table_name,
+    )
+
+    print("Langchain AlloyDB vector store instantiated.")
+    return vector_store
+
+
+# [END langchain_get_alloydb_vector_store]
+
+
+# [START langchain_alloydb_vector_store_insert_data]
+async def ainsert_data(
+    vector_store: AlloyDBVectorStore,
+    texts: list[str],
+    embeddings: list[list[float]],
+    metadatas: list[dict],
+    ids: list[str],
+) -> list[str]:
+    inserted_ids = await vector_store.aadd_embeddings(
+        texts=texts,
+        embeddings=embeddings,
+        metadatas=metadatas,
+        ids=ids,
+    )
+
+    print("AlloyDB client fetched all data from index.")
+    return inserted_ids
+
+
+# [END langchain_alloydb_vector_store_insert_data]
+
+
+async def main() -> None:
+    client = await aget_client(
+        project_id=sys.argv[1],
+        region=sys.argv[2],
+        cluster=sys.argv[3],
+        instance=sys.argv[4],
+        database=sys.argv[5],
+        user=sys.argv[6],
+        password=sys.argv[7],
+    )
+    # In case you're using a different embeddings service, choose one from [LangChain's Embedding models](https://python.langchain.com/v0.2/docs/integrations/text_embedding/).
+    embeddings_service = get_embeddings_service(size=768)
+    await ainit_vector_store(
+        engine=client,
+        table_name=sys.argv[8],
+        vector_size=768,
+    )
+    vs = await aget_vector_store(
+        engine=client,
+        embeddings_service=embeddings_service,
+        table_name=sys.argv[8],
+    )
+    # sample rows
+    ids = [str(uuid.uuid4())]
+    contents = ["content_1"]
+    embeddings = embeddings_service.embed_documents(contents)
+    metadatas = [{} for _ in contents]
+    ids = await ainsert_data(
+        vector_store=vs,
+        ids=ids,
+        contents=contents,
+        embeddings=embeddings,
+        metadatas=metadatas,
+    )
+    await client.close()
+    print(f"Inserted {len(ids)} values to Langchain Alloy DB Vector Store.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/samples/migrations/snippets/pinecone_snippets.py b/samples/migrations/snippets/pinecone_snippets.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+# [START pinecone_get_client]
+from pinecone import Index, Pinecone, ServerlessSpec
+
+
+def get_client(pinecone_api_key: str) -> Pinecone:
+    pc = Pinecone(
+        api_key=pinecone_api_key,
+        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
+    )
+
+    print("Pinecone client initiated.")
+    return pc
+
+
+# [END pinecone_get_client]
+
+# [START pinecone_get_index]
+from pinecone import Pinecone, ServerlessSpec
+
+
+def get_index(client: Pinecone, index_name: str = "index-name") -> Index:
+    index = client.Index(index_name)
+
+    print("Pinecone index reference initiated.")
+    return index
+
+
+# [END pinecone_get_index]
+
+
+# [START pinecone_get_all_ids]
+def get_all_ids(index: Index, namespace="") -> list[str]:
+    results = index.list_paginated(prefix="", namespace=namespace)
+    ids = [v.id for v in results.vectors]
+    while results.pagination is not None:
+        pagination_token = results.pagination.next
+        results = index.list_paginated(prefix="", pagination_token=pagination_token)
+        ids.extend([v.id for v in results.vectors])
+
+    print("Pinecone client fetched all ids from index.")
+
+    return ids
+
+
+# [END pinecone_get_all_ids]
+
+
+# [START pinecone_get_all_data]
+def get_all_data(
+    index: Index, ids: list[str]
+) -> tuple[list[str], list[str], list[list[float]], list[dict]]:
+    all_data = index.fetch(ids=ids)
+    ids = []
+    embeddings = []
+    contents = []
+    metadatas = []
+    for doc in all_data["vectors"].values():
+        ids.append(doc["id"])
+        embeddings.append(doc["values"])
+        contents.append(str(doc["metadata"]))
+        metadata = doc["metadata"]
+        metadatas.append(metadata)
+
+    print("Pinecone client fetched all data from index.")
+    return ids, contents, embeddings, metadatas
+
+
+# [END pinecone_get_all_data]
+
+
+if __name__ == "__main__":
+    client = get_client(
+        pinecone_api_key=sys.argv[1],
+    )
+    index = get_index(
+        client=client,
+        index_name=sys.argv[2],
+    )
+    ids = get_all_ids(
+        index=index,
+    )
+    ids, content, embeddings, metadatas = get_all_data(index=index, ids=ids)
+    print(f"Downloaded {len(ids)} values from Pinecone.")
diff --git a/samples/migrations/snippets/requirements-test.txt b/samples/migrations/snippets/requirements-test.txt
@@ -0,0 +1,2 @@
+pytest==8.3.3
+pytest-asyncio==0.24.0
diff --git a/samples/migrations/snippets/requirements.txt b/samples/migrations/snippets/requirements.txt
@@ -0,0 +1,11 @@
+langchain-google-alloydb-pg==0.8.0
+langchain-core==0.3.25
+# Pinecone has a grpc option
+# pinecone[grpc]==5.0.1
+pinecone==5.4.2
+weaviate-client==4.10.2
+langchain-chroma==0.1.4
+qdrant-client==1.12.1
+pymilvus==2.5.0
+protobuf==5.29.1
+grpcio-tools==1.67.1