From 1982f5c6156700281cd54e9f35e53593bc1f327d Mon Sep 17 00:00:00 2001 From: anakin87 Date: Thu, 15 Feb 2024 15:48:40 +0100 Subject: [PATCH] gradient - update notebook for new secret mgmt --- ...beders-and-generators-for-notion-rag.ipynb | 202 +++++++++--------- 1 file changed, 100 insertions(+), 102 deletions(-) diff --git a/notebooks/gradient-embeders-and-generators-for-notion-rag.ipynb b/notebooks/gradient-embeders-and-generators-for-notion-rag.ipynb index e81c993..2589898 100644 --- a/notebooks/gradient-embeders-and-generators-for-notion-rag.ipynb +++ b/notebooks/gradient-embeders-and-generators-for-notion-rag.ipynb @@ -1,22 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyP6v9IBjf1C1AJvH3peU6XG" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", + "metadata": { + "id": "_coq_qCuItbN" + }, "source": [ "# Use Gradient Models for Notion RAG\n", "\n", @@ -24,10 +12,7 @@ "- Creating a custom Haystack component called `NotionExporter`\n", "- Building an indexing pipeline to write our Notion pages into an `InMemoryDocumentStore` with embeddings\n", "- Build a custom RAG pipeline to do question answering on our Notion pages" - ], - "metadata": { - "id": "_coq_qCuItbN" - } + ] }, { "cell_type": "code", @@ -47,70 +32,76 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JRogtDXaAMIF" + }, + "outputs": [], "source": [ "import getpass\n", "import os\n", "\n", "notion_api_key = getpass.getpass(\"Enter Notion API key:\")\n", - "gradient_access_token = getpass.getpass(\"Gradient Access token:\")" - ], - "metadata": { - "id": "JRogtDXaAMIF" - }, - "execution_count": null, - "outputs": [] + "os.environ[\"GRADIENT_ACCESS_TOKEN\"] = getpass.getpass(\"Gradient Access token:\")\n", + "os.environ[\"GRADIENT_WORKSPACE_ID\"] = getpass.getpass(\"Gradient Workspace Id:\")" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "aPXd4RjEKzBG" + }, "source": [ "### Test the NotionExporter\n", "\n", "- You can follow the steps outlined in the Notion [documentation](https://developers.notion.com/docs/create-a-notion-integration#create-your-integration-in-notion) to create a new Notion integration, connect it to your pages, and obtain your API token.\n", "- Page IDs in Notion are the tailing numbers at the end of the page URL, separated by a '-' at 8-4-4-4-12 digits" - ], - "metadata": { - "id": "aPXd4RjEKzBG" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3_blVmFsAGSf" + }, + "outputs": [], "source": [ "from notion_haystack import NotionExporter\n", "\n", "exporter = NotionExporter(api_token=notion_api_key)" - ], - "metadata": { - "id": "3_blVmFsAGSf" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "exporter.run(page_ids=[\"6f98e9a6-a880-40e9-b191-1c4f41efec87\"])" - ], + "execution_count": null, "metadata": { "id": "cK8VTkJlAfLs" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "exporter.run(page_ids=[\"6f98e9a6-a880-40e9-b191-1c4f41efec87\"])" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "KlMkLSVjJVoW" + }, "source": [ "## Build an Indexing Pipeline to Write Notion Pages to a Document Store\n", "\n", "- Documentation on [`GradientDocumentEmbedder`](https://haystack.deepset.ai/integrations/gradient#usage)\n", "- Documentation on [`DocumentSplitter`](https://docs.haystack.deepset.ai/v2.0/docs/documentsplitter)\n", "- Documentation on [`DocumentWriter`](https://docs.haystack.deepset.ai/v2.0/docs/documentwriter)" - ], - "metadata": { - "id": "KlMkLSVjJVoW" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bZefXI2cBRME" + }, + "outputs": [], "source": [ "from haystack.components.preprocessors import DocumentSplitter\n", "from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder\n", @@ -121,17 +112,17 @@ "document_store = InMemoryDocumentStore()\n", "exporter = NotionExporter(api_token=notion_api_key)\n", "splitter = DocumentSplitter()\n", - "document_embedder = GradientDocumentEmbedder(access_token=gradient_access_token, workspace_id=\"9ee7071c-2fa9-4155-8edd-94ed371f1750_workspace\")\n", + "document_embedder = GradientDocumentEmbedder()\n", "writer = DocumentWriter(document_store=document_store)\n" - ], - "metadata": { - "id": "bZefXI2cBRME" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JZVzXyHdDxwg" + }, + "outputs": [], "source": [ "from haystack import Pipeline\n", "\n", @@ -140,54 +131,53 @@ "indexing_pipeline.add_component(instance=splitter, name=\"splitter\")\n", "indexing_pipeline.add_component(instance=document_embedder, name=\"document_embedder\")\n", "indexing_pipeline.add_component(instance=writer, name=\"writer\")" - ], - "metadata": { - "id": "JZVzXyHdDxwg" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ht3U0JNJEFW6" + }, + "outputs": [], "source": [ "indexing_pipeline.connect(\"exporter.documents\", \"splitter.documents\")\n", "indexing_pipeline.connect(\"splitter.documents\", \"document_embedder.documents\")\n", "indexing_pipeline.connect(\"document_embedder.documents\", \"writer.documents\")" - ], - "metadata": { - "id": "ht3U0JNJEFW6" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "indexing_pipeline.run(data={\"exporter\":{\"page_ids\": [\"6f98e9a6-a880-40e9-b191-1c4f41efec87\"]}})" - ], + "execution_count": null, "metadata": { "id": "mdn9N0XkEZBH" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "indexing_pipeline.run(data={\"exporter\":{\"page_ids\": [\"6f98e9a6-a880-40e9-b191-1c4f41efec87\"]}})" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "A7iIQrtJJ1-6" + }, "source": [ "## Build a RAG Pipeline with Cohere\n", "\n", "- Documentation on [`GradientTextEmbedder`](https://haystack.deepset.ai/integrations/gradient#usage)\n", "- Documentation on [`PromptBuilder`](https://docs.haystack.deepset.ai/v2.0/docs/promptbuilder)\n", "- Documentation on [`GradientGenerator`](GradientTextEmbedder)" - ], - "metadata": { - "id": "A7iIQrtJJ1-6" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tbX35iA3_ImP" + }, + "outputs": [], "source": [ - "import torch\n", "from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n", "from haystack.components.builders import PromptBuilder\n", "from haystack_integrations.components.embedders.gradient import GradientTextEmbedder\n", @@ -203,22 +193,20 @@ "\n", "Query: {{query}}\n", "\"\"\"\n", - "text_embedder = GradientTextEmbedder(access_token=gradient_access_token, workspace_id=\"9ee7071c-2fa9-4155-8edd-94ed371f1750_workspace\")\n", + "text_embedder = GradientTextEmbedder()\n", "retriever = InMemoryEmbeddingRetriever(document_store=document_store)\n", "prompt_builder = PromptBuilder(template=prompt)\n", - "generator = GradientGenerator(access_token=gradient_access_token,\n", - " workspace_id=\"9ee7071c-2fa9-4155-8edd-94ed371f1750_workspace\",\n", - " model_adapter_id=\"905db818-d031-4378-bd67-ac9804cb0961_model_adapter\",\n", + "generator = GradientGenerator(model_adapter_id=\"905db818-d031-4378-bd67-ac9804cb0961_model_adapter\",\n", " max_generated_token_count=350)\n" - ], - "metadata": { - "id": "tbX35iA3_ImP" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "71PMDYsUBOY8" + }, + "outputs": [], "source": [ "rag_pipeline = Pipeline()\n", "\n", @@ -230,36 +218,46 @@ "rag_pipeline.connect(\"text_embedder\", \"retriever\")\n", "rag_pipeline.connect(\"retriever.documents\", \"prompt_builder.documents\")\n", "rag_pipeline.connect(\"prompt_builder\", \"generator\")" - ], - "metadata": { - "id": "71PMDYsUBOY8" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zszY4bgzB2M0" + }, + "outputs": [], "source": [ "question = \"What are the steps for creating a custom component?\"\n", "result = rag_pipeline.run(data={\"text_embedder\":{\"text\": question},\n", " \"prompt_builder\":{\"query\": question}})" - ], - "metadata": { - "id": "zszY4bgzB2M0" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "print(result['generator']['replies'][0])" - ], + "execution_count": null, "metadata": { "id": "6EbRQ3OPCDs4" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(result['generator']['replies'][0])" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyP6v9IBjf1C1AJvH3peU6XG", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" } - ] + }, + "nbformat": 4, + "nbformat_minor": 0 }