diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 127721f4693..2592e0780b9 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -9,12 +9,17 @@ "dataType": "ParseData", "id": "ParseData-1lOsZ", "name": "text", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "context", "id": "Prompt-o2SL4", - "inputTypes": ["Message", "Text"], + "inputTypes": [ + "Message", + "Text" + ], "type": "str" } }, @@ -32,12 +37,16 @@ "dataType": "Prompt", "id": "Prompt-o2SL4", "name": "prompt", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "input_value", "id": "OpenAIModel-yjcwf", - "inputTypes": ["Message"], + "inputTypes": [ + "Message" + ], "type": "str" } }, @@ -55,12 +64,16 @@ "dataType": "OpenAIModel", "id": "OpenAIModel-yjcwf", "name": "text_output", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "input_value", "id": "ChatOutput-p7okC", - "inputTypes": ["Message"], + "inputTypes": [ + "Message" + ], "type": "str" } }, @@ -78,12 +91,17 @@ "dataType": "ChatInput", "id": "ChatInput-uBsVA", "name": "message", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "question", "id": "Prompt-o2SL4", - "inputTypes": ["Message", "Text"], + "inputTypes": [ + "Message", + "Text" + ], "type": "str" } }, @@ -101,12 +119,16 @@ "dataType": "File", "id": "File-jTMwG", "name": "data", - "output_types": ["Data"] + "output_types": [ + "Data" + ] }, "targetHandle": { "fieldName": "data_inputs", "id": "SplitText-bByhd", - "inputTypes": ["Data"], + "inputTypes": [ + "Data" + ], "type": "other" } }, @@ -122,12 +144,16 @@ "dataType": "OpenAIEmbeddings", "id": "OpenAIEmbeddings-b229k", "name": "embeddings", - "output_types": ["Embeddings"] + "output_types": [ + "Embeddings" + ] }, "targetHandle": { "fieldName": "embedding_model", "id": "AstraDB-0959q", - "inputTypes": ["Embeddings"], + "inputTypes": [ + "Embeddings" + ], "type": "other" } }, @@ -143,12 +169,16 @@ "dataType": "SplitText", "id": "SplitText-bByhd", "name": "chunks", - "output_types": ["Data"] + "output_types": [ + "Data" + ] }, "targetHandle": { "fieldName": "ingest_data", "id": "AstraDB-0959q", - "inputTypes": ["Data"], + "inputTypes": [ + "Data" + ], "type": "other" } }, @@ -164,12 +194,16 @@ "dataType": "OpenAIEmbeddings", "id": "OpenAIEmbeddings-qVkNT", "name": "embeddings", - "output_types": ["Embeddings"] + "output_types": [ + "Embeddings" + ] }, "targetHandle": { "fieldName": "embedding_model", "id": "AstraDB-3Vxgl", - "inputTypes": ["Embeddings"], + "inputTypes": [ + "Embeddings" + ], "type": "other" } }, @@ -185,12 +219,16 @@ "dataType": "ChatInput", "id": "ChatInput-uBsVA", "name": "message", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "search_query", "id": "AstraDB-3Vxgl", - "inputTypes": ["Message"], + "inputTypes": [ + "Message" + ], "type": "str" } }, @@ -206,12 +244,16 @@ "dataType": "AstraDB", "id": "AstraDB-3Vxgl", "name": "search_results", - "output_types": ["Data"] + "output_types": [ + "Data" + ] }, "targetHandle": { "fieldName": "data", "id": "ParseData-1lOsZ", - "inputTypes": ["Data"], + "inputTypes": [ + "Data" + ], "type": "other" } }, @@ -229,7 +271,9 @@ "display_name": "Chat Input", "id": "ChatInput-uBsVA", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -258,7 +302,9 @@ "method": "message_response", "name": "message", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -271,7 +317,9 @@ "display_name": "Background Color", "dynamic": false, "info": "The background color of the icon.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "background_color", @@ -290,7 +338,9 @@ "display_name": "Icon", "dynamic": false, "info": "The icon of the message.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "chat_icon", @@ -367,7 +417,9 @@ "display_name": "Text", "dynamic": false, "info": "Message to be passed as input.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -387,7 +439,10 @@ "dynamic": false, "info": "Type of sender.", "name": "sender", - "options": ["Machine", "User"], + "options": [ + "Machine", + "User" + ], "placeholder": "", "required": false, "show": true, @@ -401,7 +456,9 @@ "display_name": "Sender Name", "dynamic": false, "info": "Name of the sender.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "sender_name", @@ -419,7 +476,9 @@ "display_name": "Session ID", "dynamic": false, "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "session_id", @@ -453,7 +512,9 @@ "display_name": "Text Color", "dynamic": false, "info": "The text color of the name", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "text_color", @@ -491,7 +552,9 @@ "display_name": "Parse Data", "id": "ParseData-1lOsZ", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -499,7 +562,11 @@ "display_name": "Parse Data", "documentation": "", "edited": false, - "field_order": ["data", "template", "sep"], + "field_order": [ + "data", + "template", + "sep" + ], "frozen": false, "icon": "braces", "legacy": false, @@ -513,7 +580,9 @@ "method": "parse_data", "name": "text", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" }, { @@ -522,7 +591,9 @@ "method": "parse_data_as_list", "name": "data_list", "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -552,7 +623,9 @@ "display_name": "Data", "dynamic": false, "info": "The data to convert to text.", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": true, "name": "data", "placeholder": "", @@ -585,7 +658,9 @@ "display_name": "Template", "dynamic": false, "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -624,18 +699,25 @@ "display_name": "Prompt", "id": "Prompt-o2SL4", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": { - "template": ["context", "question"] + "template": [ + "context", + "question" + ] }, "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", "documentation": "", "edited": false, "error": null, - "field_order": ["template"], + "field_order": [ + "template" + ], "frozen": false, "full_path": null, "icon": "prompts", @@ -654,7 +736,9 @@ "method": "build_prompt", "name": "prompt", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -687,7 +771,10 @@ "fileTypes": [], "file_path": "", "info": "", - "input_types": ["Message", "Text"], + "input_types": [ + "Message", + "Text" + ], "list": false, "load_from_db": false, "multiline": true, @@ -707,7 +794,10 @@ "fileTypes": [], "file_path": "", "info": "", - "input_types": ["Message", "Text"], + "input_types": [ + "Message", + "Text" + ], "list": false, "load_from_db": false, "multiline": true, @@ -741,7 +831,9 @@ "display_name": "Tool Placeholder", "dynamic": false, "info": "A placeholder input for tool mode.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "tool_placeholder", @@ -781,7 +873,9 @@ "display_name": "Split Text", "id": "SplitText-bByhd", "node": { - "base_classes": ["Data"], + "base_classes": [ + "Data" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -808,7 +902,9 @@ "method": "split_text", "name": "chunks", "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -868,7 +964,9 @@ "display_name": "Data Inputs", "dynamic": false, "info": "The data to split.", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": true, "name": "data_inputs", "placeholder": "", @@ -884,7 +982,9 @@ "display_name": "Separator", "dynamic": false, "info": "The character to split on. Defaults to newline.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "separator", @@ -986,7 +1086,10 @@ "data": { "id": "OpenAIModel-yjcwf", "node": { - "base_classes": ["LanguageModel", "Message"], + "base_classes": [ + "LanguageModel", + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -1023,7 +1126,9 @@ "name": "text_output", "required_inputs": [], "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" }, { @@ -1033,7 +1138,9 @@ "name": "model_output", "required_inputs": [], "selected": "LanguageModel", - "types": ["LanguageModel"], + "types": [ + "LanguageModel" + ], "value": "__UNDEFINED__" } ], @@ -1046,7 +1153,9 @@ "display_name": "OpenAI API Key", "dynamic": false, "info": "The OpenAI API Key to use for the OpenAI model.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "api_key", "password": true, @@ -1081,7 +1190,9 @@ "display_name": "Input", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "input_value", @@ -1229,7 +1340,9 @@ "display_name": "System Message", "dynamic": false, "info": "System message to pass to the model.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "system_message", @@ -1285,7 +1398,9 @@ "display_name": "Chat Output", "id": "ChatOutput-p7okC", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -1317,7 +1432,9 @@ "method": "message_response", "name": "message", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -1330,7 +1447,9 @@ "display_name": "Background Color", "dynamic": false, "info": "The background color of the icon.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "background_color", @@ -1350,7 +1469,9 @@ "display_name": "Icon", "dynamic": false, "info": "The icon of the message.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "chat_icon", @@ -1388,7 +1509,9 @@ "display_name": "Data Template", "dynamic": false, "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "data_template", @@ -1408,7 +1531,9 @@ "display_name": "Text", "dynamic": false, "info": "Message to be passed as output.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "input_value", @@ -1429,7 +1554,10 @@ "dynamic": false, "info": "Type of sender.", "name": "sender", - "options": ["Machine", "User"], + "options": [ + "Machine", + "User" + ], "placeholder": "", "required": false, "show": true, @@ -1445,7 +1573,9 @@ "display_name": "Sender Name", "dynamic": false, "info": "Name of the sender.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "sender_name", @@ -1465,7 +1595,9 @@ "display_name": "Session ID", "dynamic": false, "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "session_id", @@ -1501,7 +1633,9 @@ "display_name": "Text Color", "dynamic": false, "info": "The text color of the name", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "text_color", @@ -1539,7 +1673,9 @@ "data": { "id": "OpenAIEmbeddings-qVkNT", "node": { - "base_classes": ["Embeddings"], + "base_classes": [ + "Embeddings" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -1584,7 +1720,9 @@ "name": "embeddings", "required_inputs": [], "selected": "Embeddings", - "types": ["Embeddings"], + "types": [ + "Embeddings" + ], "value": "__UNDEFINED__" } ], @@ -1613,7 +1751,9 @@ "display_name": "Client", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "client", @@ -1683,7 +1823,9 @@ "display_name": "Deployment", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "deployment", @@ -1789,7 +1931,9 @@ "display_name": "OpenAI API Base", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_base", @@ -1809,7 +1953,9 @@ "display_name": "OpenAI API Key", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "openai_api_key", "password": true, @@ -1826,7 +1972,9 @@ "display_name": "OpenAI API Type", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_type", @@ -1846,7 +1994,9 @@ "display_name": "OpenAI API Version", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_version", @@ -1866,7 +2016,9 @@ "display_name": "OpenAI Organization", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_organization", @@ -1886,7 +2038,9 @@ "display_name": "OpenAI Proxy", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_proxy", @@ -1970,7 +2124,9 @@ "display_name": "TikToken Model Name", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "tiktoken_model_name", @@ -2040,7 +2196,9 @@ "data": { "id": "OpenAIEmbeddings-b229k", "node": { - "base_classes": ["Embeddings"], + "base_classes": [ + "Embeddings" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -2085,7 +2243,9 @@ "name": "embeddings", "required_inputs": [], "selected": "Embeddings", - "types": ["Embeddings"], + "types": [ + "Embeddings" + ], "value": "__UNDEFINED__" } ], @@ -2114,7 +2274,9 @@ "display_name": "Client", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "client", @@ -2184,7 +2346,9 @@ "display_name": "Deployment", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "deployment", @@ -2290,7 +2454,9 @@ "display_name": "OpenAI API Base", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_base", @@ -2310,7 +2476,9 @@ "display_name": "OpenAI API Key", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "openai_api_key", "password": true, @@ -2327,7 +2495,9 @@ "display_name": "OpenAI API Type", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_type", @@ -2347,7 +2517,9 @@ "display_name": "OpenAI API Version", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_version", @@ -2367,7 +2539,9 @@ "display_name": "OpenAI Organization", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_organization", @@ -2387,7 +2561,9 @@ "display_name": "OpenAI Proxy", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_proxy", @@ -2471,7 +2647,9 @@ "display_name": "TikToken Model Name", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "tiktoken_model_name", @@ -2509,7 +2687,9 @@ "data": { "id": "File-jTMwG", "node": { - "base_classes": ["Data"], + "base_classes": [ + "Data" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -2537,7 +2717,9 @@ "name": "data", "required_inputs": [], "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -2600,7 +2782,10 @@ "display_name": "Server File Path", "dynamic": false, "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", - "input_types": ["Data", "Message"], + "input_types": [ + "Data", + "Message" + ], "list": true, "name": "file_path", "placeholder": "", @@ -2825,7 +3010,9 @@ "data": { "id": "AstraDB-0959q", "node": { - "base_classes": ["Data"], + "base_classes": [ + "Data" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -2869,7 +3056,9 @@ "token" ], "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -2899,7 +3088,9 @@ "display_name": "API Endpoint", "dynamic": false, "info": "The Astra DB API Endpoint to use. Overrides selection of database.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "api_endpoint", "password": true, @@ -2945,7 +3136,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nfrom collections import defaultdict\n\nfrom astrapy import AstraDBAdmin, DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n base_inputs = LCVectorStoreComponent.inputs\n if \"search_query\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n )\n )\n if \"ingest_data\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n )\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Database\",\n info=\"The Astra DB Database to use.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"Default database\"],\n value=\"Default database\",\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=os.getenv(\"LANGFLOW_HOST\") is not None,\n required=os.getenv(\"LANGFLOW_HOST\") is None,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n *base_inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def get_vectorize_providers(self):\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n admin = AstraDBAdmin(token=self.token)\n db_admin = admin.get_database_admin(self.get_api_endpoint())\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return {}\n\n def get_database_list(self):\n # Get the admin object\n db_admin = AstraDBAdmin(token=self.token)\n db_list = list(db_admin.list_databases())\n\n # Generate the api endpoint for each database\n return {db.info.name: f\"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com\" for db in db_list}\n\n def get_api_endpoint(self):\n # Get the database name (or endpoint)\n database = self.api_endpoint\n\n # If the database is not set, get the first database in the list\n if not database or database == \"Default database\":\n database, _ = next(iter(self.get_database_list().items()))\n\n # If the database is a URL, return it\n if database.startswith(\"https://\"):\n return database\n\n # Otherwise, get the URL from the database list\n return self.get_database_list().get(database)\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n if not self.token:\n return [\"Default database\"]\n try:\n databases = [\"Default database\", *list(self.get_database_list().keys())]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching databases: {e}\")\n\n return [\"Default database\"]\n\n return databases\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return [\"+ Create new collection\"]\n\n try:\n collections = [collection.name for collection in database.list_collections(keyspace=self.keyspace or None)]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return [\"+ Create new collection\"]\n\n return [*collections, \"+ Create new collection\"]\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name, keyspace=self.keyspace or None)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Always attempt to update the database list\n if field_name in {\"token\", \"api_endpoint\", \"collection_name\"}:\n # Update the database selector\n build_config[\"api_endpoint\"][\"options\"] = self._initialize_database_options()\n\n # Set the default API endpoint if not set\n if build_config[\"api_endpoint\"][\"value\"] == \"Default database\":\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options\"][0]\n\n # Update the collection selector\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.get_vectorize_providers()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Initialize parameters based on the collection name\n is_new_collection = self.get_collection_options() is None\n\n # Get the embedding model\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_choice == \"Embedding Model\" else {}\n\n # Use the embedding model if the choice is set to \"Embedding Model\"\n if self.embedding_choice == \"Astra Vectorize\" and is_new_collection:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=getattr(self, \"embedding_provider\", None) or None,\n model=getattr(self, \"model\", None) or None,\n z_01_model_parameters=getattr(self, \"z_01_model_parameters\", None) or None,\n z_02_api_key_name=getattr(self, \"z_02_api_key_name\", None) or None,\n z_03_provider_api_key=getattr(self, \"z_03_provider_api_key\", None) or None,\n z_04_authentication=getattr(self, \"z_04_authentication\", {}) or {},\n )\n\n # Set the embedding dictionary\n embedding_params = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n # Get the running environment for Langflow\n environment = parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() else None\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": not is_new_collection, # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.get_api_endpoint(),\n namespace=self.keyspace or None,\n collection_name=self.get_collection_choice(),\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params or {},\n **embedding_params or {},\n **self.astradb_vectorstore_kwargs or {},\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n class NewDatabaseInput(DictInput):\n title: str = \"Create New Database\"\n description: str = \"Create a new database in Astra DB.\"\n db_names: list[str] = []\n status: str = \"\"\n collection_count: int = 0\n record_count: int = 0\n\n class NewCollectionInput(DictInput):\n title: str = \"Create New Collection\"\n description: str = \"Create a new collection in Astra DB.\"\n status: str = \"\"\n dimensions: int = 0\n model: str = \"\"\n similarity_metrics: list[str] = []\n icon: str = \"Collection\"\n\n base_inputs = LCVectorStoreComponent.inputs\n if \"search_query\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n )\n )\n if \"ingest_data\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n )\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"API Endpoint\",\n info=\"The Astra DB API Endpoint to use. Overrides selection of database.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"Select a database in Astra DB.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=[NewDatabaseInput(name=\"database_input\").__dict__],\n options=[],\n value=\"\",\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=[NewCollectionInput(name=\"collection_input\").__dict__],\n options=[],\n value=\"\",\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n *base_inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def get_database_list(self):\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin_client = client.get_admin(token=self.token)\n db_list = list(admin_client.list_databases())\n\n # Generate the api endpoint for each database\n return {\n db.info.name: {\n \"api_endpoint\": f\"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com\",\n \"collections\": len(list(client.get_database(db.info.id, token=self.token).list_collection_names())),\n \"records\": 0,\n }\n for db in db_list\n }\n\n def get_api_endpoint(self):\n # If the API endpoint is set, return it\n if self.api_endpoint:\n return self.api_endpoint\n\n # If the database is not set, nothing we can do.\n if not self.database_name:\n return None\n\n # Otherwise, get the URL from the database list\n return self.get_database_list().get(self.database_name)\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def collection_exists(self):\n try:\n client = DataAPIClient(token=self.token)\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n )\n return self.collection_name in list(database.list_collections())\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting collection status: {e}\")\n\n return False\n\n def _initialize_database_options(self):\n try:\n return [\n {\"name\": name, \"collections\": info[\"collections\"], \"records\": info[\"records\"]}\n for name, info in self.get_database_list().items()\n ]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching databases: {e}\")\n\n return []\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return []\n\n try:\n collection_list = list(database.list_collections())\n\n return [\n {\n \"name\": col.name,\n \"records\": 0,\n \"provider\": col.options.vector.service.provider if col.options.vector else \"\",\n \"model\": col.options.vector.service.model_name if col.options.vector else \"\",\n }\n for col in collection_list\n ]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return []\n return [*collections, \"+ Create new collection\"]\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None): # noqa: ARG002\n # Refresh the collection name options\n build_config[\"database_name\"][\"options\"] = self._initialize_database_options()\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n return build_config\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name, keyspace=self.keyspace or None)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Always attempt to update the database list\n if field_name in {\"token\", \"api_endpoint\", \"collection_name\"}:\n # Update the database selector\n build_config[\"api_endpoint\"][\"options\"] = self._initialize_database_options()\n\n # Set the default API endpoint if not set\n if build_config[\"api_endpoint\"][\"value\"] == \"Default database\":\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options\"][0]\n\n # Update the collection selector\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get the running environment for Langflow\n environment = (\n parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() is not None else None\n )\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": self.collection_exists(), # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.get_api_endpoint(),\n namespace=self.keyspace or None,\n collection_name=self.collection_name,\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", @@ -3041,7 +3232,9 @@ "display_name": "Embedding Model", "dynamic": false, "info": "Allows an embedding model configuration.", - "input_types": ["Embeddings"], + "input_types": [ + "Embeddings" + ], "list": false, "name": "embedding_model", "placeholder": "", @@ -3074,7 +3267,9 @@ "display_name": "Ingest Data", "dynamic": false, "info": "", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": false, "name": "ingest_data", "placeholder": "", @@ -3126,7 +3321,9 @@ "display_name": "Search Query", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -3185,7 +3382,9 @@ "display_name": "Astra DB Application Token", "dynamic": false, "info": "Authentication token for accessing Astra DB.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "token", "password": true, @@ -3221,7 +3420,9 @@ "data": { "id": "AstraDB-3Vxgl", "node": { - "base_classes": ["Data"], + "base_classes": [ + "Data" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -3265,7 +3466,9 @@ "token" ], "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -3295,7 +3498,9 @@ "display_name": "API Endpoint", "dynamic": false, "info": "The Astra DB API Endpoint to use. Overrides selection of database.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "api_endpoint", "password": true, @@ -3341,7 +3546,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nfrom collections import defaultdict\n\nfrom astrapy import AstraDBAdmin, DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n base_inputs = LCVectorStoreComponent.inputs\n if \"search_query\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n )\n )\n if \"ingest_data\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n )\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Database\",\n info=\"The Astra DB Database to use.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"Default database\"],\n value=\"Default database\",\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=os.getenv(\"LANGFLOW_HOST\") is not None,\n required=os.getenv(\"LANGFLOW_HOST\") is None,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n *base_inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def get_vectorize_providers(self):\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n admin = AstraDBAdmin(token=self.token)\n db_admin = admin.get_database_admin(self.get_api_endpoint())\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return {}\n\n def get_database_list(self):\n # Get the admin object\n db_admin = AstraDBAdmin(token=self.token)\n db_list = list(db_admin.list_databases())\n\n # Generate the api endpoint for each database\n return {db.info.name: f\"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com\" for db in db_list}\n\n def get_api_endpoint(self):\n # Get the database name (or endpoint)\n database = self.api_endpoint\n\n # If the database is not set, get the first database in the list\n if not database or database == \"Default database\":\n database, _ = next(iter(self.get_database_list().items()))\n\n # If the database is a URL, return it\n if database.startswith(\"https://\"):\n return database\n\n # Otherwise, get the URL from the database list\n return self.get_database_list().get(database)\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n if not self.token:\n return [\"Default database\"]\n try:\n databases = [\"Default database\", *list(self.get_database_list().keys())]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching databases: {e}\")\n\n return [\"Default database\"]\n\n return databases\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return [\"+ Create new collection\"]\n\n try:\n collections = [collection.name for collection in database.list_collections(keyspace=self.keyspace or None)]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return [\"+ Create new collection\"]\n\n return [*collections, \"+ Create new collection\"]\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name, keyspace=self.keyspace or None)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Always attempt to update the database list\n if field_name in {\"token\", \"api_endpoint\", \"collection_name\"}:\n # Update the database selector\n build_config[\"api_endpoint\"][\"options\"] = self._initialize_database_options()\n\n # Set the default API endpoint if not set\n if build_config[\"api_endpoint\"][\"value\"] == \"Default database\":\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options\"][0]\n\n # Update the collection selector\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.get_vectorize_providers()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Initialize parameters based on the collection name\n is_new_collection = self.get_collection_options() is None\n\n # Get the embedding model\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_choice == \"Embedding Model\" else {}\n\n # Use the embedding model if the choice is set to \"Embedding Model\"\n if self.embedding_choice == \"Astra Vectorize\" and is_new_collection:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=getattr(self, \"embedding_provider\", None) or None,\n model=getattr(self, \"model\", None) or None,\n z_01_model_parameters=getattr(self, \"z_01_model_parameters\", None) or None,\n z_02_api_key_name=getattr(self, \"z_02_api_key_name\", None) or None,\n z_03_provider_api_key=getattr(self, \"z_03_provider_api_key\", None) or None,\n z_04_authentication=getattr(self, \"z_04_authentication\", {}) or {},\n )\n\n # Set the embedding dictionary\n embedding_params = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n # Get the running environment for Langflow\n environment = parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() else None\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": not is_new_collection, # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.get_api_endpoint(),\n namespace=self.keyspace or None,\n collection_name=self.get_collection_choice(),\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params or {},\n **embedding_params or {},\n **self.astradb_vectorstore_kwargs or {},\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n class NewDatabaseInput(DictInput):\n title: str = \"Create New Database\"\n description: str = \"Create a new database in Astra DB.\"\n db_names: list[str] = []\n status: str = \"\"\n collection_count: int = 0\n record_count: int = 0\n\n class NewCollectionInput(DictInput):\n title: str = \"Create New Collection\"\n description: str = \"Create a new collection in Astra DB.\"\n status: str = \"\"\n dimensions: int = 0\n model: str = \"\"\n similarity_metrics: list[str] = []\n icon: str = \"Collection\"\n\n base_inputs = LCVectorStoreComponent.inputs\n if \"search_query\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n )\n )\n if \"ingest_data\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n )\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"API Endpoint\",\n info=\"The Astra DB API Endpoint to use. Overrides selection of database.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"Select a database in Astra DB.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=[NewDatabaseInput(name=\"database_input\").__dict__],\n options=[],\n value=\"\",\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=[NewCollectionInput(name=\"collection_input\").__dict__],\n options=[],\n value=\"\",\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n *base_inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def get_database_list(self):\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin_client = client.get_admin(token=self.token)\n db_list = list(admin_client.list_databases())\n\n # Generate the api endpoint for each database\n return {\n db.info.name: {\n \"api_endpoint\": f\"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com\",\n \"collections\": len(list(client.get_database(db.info.id, token=self.token).list_collection_names())),\n \"records\": 0,\n }\n for db in db_list\n }\n\n def get_api_endpoint(self):\n # If the API endpoint is set, return it\n if self.api_endpoint:\n return self.api_endpoint\n\n # If the database is not set, nothing we can do.\n if not self.database_name:\n return None\n\n # Otherwise, get the URL from the database list\n return self.get_database_list().get(self.database_name)\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def collection_exists(self):\n try:\n client = DataAPIClient(token=self.token)\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n )\n return self.collection_name in list(database.list_collections())\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting collection status: {e}\")\n\n return False\n\n def _initialize_database_options(self):\n try:\n return [\n {\"name\": name, \"collections\": info[\"collections\"], \"records\": info[\"records\"]}\n for name, info in self.get_database_list().items()\n ]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching databases: {e}\")\n\n return []\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return []\n\n try:\n collection_list = list(database.list_collections())\n\n return [\n {\n \"name\": col.name,\n \"records\": 0,\n \"provider\": col.options.vector.service.provider if col.options.vector else \"\",\n \"model\": col.options.vector.service.model_name if col.options.vector else \"\",\n }\n for col in collection_list\n ]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return []\n return [*collections, \"+ Create new collection\"]\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None): # noqa: ARG002\n # Refresh the collection name options\n build_config[\"database_name\"][\"options\"] = self._initialize_database_options()\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n return build_config\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name, keyspace=self.keyspace or None)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Always attempt to update the database list\n if field_name in {\"token\", \"api_endpoint\", \"collection_name\"}:\n # Update the database selector\n build_config[\"api_endpoint\"][\"options\"] = self._initialize_database_options()\n\n # Set the default API endpoint if not set\n if build_config[\"api_endpoint\"][\"value\"] == \"Default database\":\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options\"][0]\n\n # Update the collection selector\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get the running environment for Langflow\n environment = (\n parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() is not None else None\n )\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": self.collection_exists(), # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.get_api_endpoint(),\n namespace=self.keyspace or None,\n collection_name=self.collection_name,\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", @@ -3437,7 +3642,9 @@ "display_name": "Embedding Model", "dynamic": false, "info": "Allows an embedding model configuration.", - "input_types": ["Embeddings"], + "input_types": [ + "Embeddings" + ], "list": false, "name": "embedding_model", "placeholder": "", @@ -3470,7 +3677,9 @@ "display_name": "Ingest Data", "dynamic": false, "info": "", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": false, "name": "ingest_data", "placeholder": "", @@ -3522,7 +3731,9 @@ "display_name": "Search Query", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -3581,7 +3792,9 @@ "display_name": "Astra DB Application Token", "dynamic": false, "info": "Authentication token for accessing Astra DB.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "token", "password": true, @@ -3626,5 +3839,10 @@ "is_component": false, "last_tested_version": "1.1.1", "name": "Vector Store RAG", - "tags": ["openai", "astradb", "rag", "q-a"] -} + "tags": [ + "openai", + "astradb", + "rag", + "q-a" + ] +} \ No newline at end of file diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json index aab53529a60..daceb8cd09f 100644 --- a/src/frontend/package-lock.json +++ b/src/frontend/package-lock.json @@ -836,7 +836,6 @@ }, "node_modules/@clack/prompts/node_modules/is-unicode-supported": { "version": "1.3.0", - "extraneous": true, "inBundle": true, "license": "MIT", "engines": {