Merge branch 'master' into code-packagetool

langchain-ai · Jan 8, 2024 · 53d01eb · 53d01eb
2 parents 25428b8 + 3a8ad90
commit 53d01eb
Show file tree

Hide file tree

Showing 40 changed files with 1,067 additions and 291 deletions.
diff --git a/docs/docs/expression_language/why.ipynb b/docs/docs/expression_language/why.ipynb
@@ -1007,7 +1007,7 @@
     "from langchain_openai import OpenAI\n",
     "from langchain_core.output_parsers import StrOutputParser\n",
     "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_core.runnables import RunnablePassthrough\n",
+    "from langchain_core.runnables import RunnablePassthrough, ConfigurableField\n",
     "\n",
     "os.environ[\"LANGCHAIN_API_KEY\"] = \"...\"\n",
     "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",

diff --git a/docs/docs/get_started/quickstart.mdx b/docs/docs/get_started/quickstart.mdx
@@ -221,25 +221,24 @@ embeddings = OllamaEmbeddings()
 </Tabs>
 
 Now, we can use this embedding model to ingest documents into a vectorstore.
-We will use a simple local vectorstore, [DocArray InMemorySearch](/docs/integrations/vectorstores/docarray_in_memory), for simplicity's sake.
+We will use a simple local vectorstore, [FAISS](/docs/integrations/vectorstores/faiss), for simplicity's sake.
 
 First we need to install the required packages for that:
 
 ```shell
-pip install docarray
-pip install tiktoken
+pip install faiss-cpu
 ```
 
 Then we can build our index:
 
 ```python
-from langchain_community.vectorstores import DocArrayInMemorySearch
+from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 text_splitter = RecursiveCharacterTextSplitter()
 documents = text_splitter.split_documents(docs)
-vector = DocArrayInMemorySearch.from_documents(documents, embeddings)
+vector = FAISS.from_documents(documents, embeddings)
 ```
 
 Now that we have this data indexed in a vectorstore, we will create a retrieval chain.
@@ -483,7 +482,7 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 from langchain_community.document_loaders import WebBaseLoader
 from langchain_openai import OpenAIEmbeddings
-from langchain_community.vectorstores import DocArrayInMemorySearch
+from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.tools.retriever import create_retriever_tool
 from langchain_community.tools.tavily_search import TavilySearchResults
@@ -501,7 +500,7 @@ docs = loader.load()
 text_splitter = RecursiveCharacterTextSplitter()
 documents = text_splitter.split_documents(docs)
 embeddings = OpenAIEmbeddings()
-vector = DocArrayInMemorySearch.from_documents(documents, embeddings)
+vector = FAISS.from_documents(documents, embeddings)
 retriever = vector.as_retriever()
 
 # 2. Create Tools

diff --git a/docs/docs/integrations/document_loaders/astradb.ipynb b/docs/docs/integrations/document_loaders/astradb.ipynb
@@ -0,0 +1,185 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vm8vn9t8DvC_"
+   },
+   "source": [
+    "# AstraDB"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5WjXERXzFEhg"
+   },
+   "source": [
+    "## Overview"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "juAmbgoWD17u"
+   },
+   "source": [
+    "The AstraDB Document Loader returns a list of Langchain Documents from an AstraDB database.\n",
+    "\n",
+    "The Loader takes the following parameters:\n",
+    "\n",
+    "* `api_endpoint`: AstraDB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
+    "* `token`: AstraDB token. Looks like `AstraCS:6gBhNmsk135....`\n",
+    "* `collection_name` : AstraDB collection name\n",
+    "* `namespace`: (Optional) AstraDB namespace\n",
+    "* `filter_criteria`: (Optional) Filter used in the find query\n",
+    "* `projection`: (Optional) Projection used in the find query\n",
+    "* `find_options`: (Optional) Options used in the find query\n",
+    "* `nb_prefetched`: (Optional) Number of documents pre-fetched by the loader\n",
+    "* `extraction_function`: (Optional) A function to convert the AstraDB document to the LangChain `page_content` string. Defaults to `json.dumps`\n",
+    "\n",
+    "The following metadata is set to the LangChain Documents metadata output:\n",
+    "\n",
+    "```python\n",
+    "{\n",
+    "    metadata : {\n",
+    "        \"namespace\": \"...\", \n",
+    "        \"api_endpoint\": \"...\", \n",
+    "        \"collection\": \"...\"\n",
+    "    }\n",
+    "}\n",
+    "```"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load documents with the Document Loader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders import AstraDBLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "from getpass import getpass\n",
+    "\n",
+    "ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
+    "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-08T12:41:22.643335Z",
+     "start_time": "2024-01-08T12:40:57.759116Z"
+    }
+   },
+   "execution_count": 4
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-08T12:42:25.395162Z",
+     "start_time": "2024-01-08T12:42:25.391387Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "loader = AstraDBLoader(\n",
+    "    api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
+    "    token=ASTRA_DB_APPLICATION_TOKEN,\n",
+    "    collection_name=\"movie_reviews\",\n",
+    "    projection={\"title\": 1, \"reviewtext\": 1},\n",
+    "    find_options={\"limit\": 10},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "docs = loader.load()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-01-08T12:42:30.236489Z",
+     "start_time": "2024-01-08T12:42:29.612133Z"
+    }
+   },
+   "execution_count": 7
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-08T12:42:31.369394Z",
+     "start_time": "2024-01-08T12:42:31.359003Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "Document(page_content='{\"_id\": \"659bdffa16cbc4586b11a423\", \"title\": \"Dangerous Men\", \"reviewtext\": \"\\\\\"Dangerous Men,\\\\\" the picture\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\"}', metadata={'namespace': 'default_keyspace', 'api_endpoint': 'https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com', 'collection': 'movie_reviews'})"
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs[0]"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [
+    "5WjXERXzFEhg"
+   ],
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}