diff --git a/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb b/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb index 24ba48e..f449a52 100644 --- a/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb +++ b/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb @@ -3,8 +3,7 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "id": "view-in-github" }, "source": [ "" @@ -12,6 +11,9 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "E1qMPnLpqcr3" + }, "source": [ "# RAGoon SimilaritySearch cookbook ⚡\n", "[![Python](https://img.shields.io/pypi/pyversions/tensorflow.svg)](https://badge.fury.io/py/tensorflow) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Maintainer](https://img.shields.io/badge/maintainer-@louisbrulenaudet-blue)\n", @@ -45,13 +47,13 @@ "## Feedback\n", "If you have any feedback, please reach out at [louisbrulenaudet@icloud.com](mailto:louisbrulenaudet@icloud.com).\n", "\n" - ], - "metadata": { - "id": "E1qMPnLpqcr3" - } + ] }, { "cell_type": "markdown", + "metadata": { + "id": "-UbYh3VCrikh" + }, "source": [ "# Installation\n", "\n", @@ -66,20 +68,17 @@ "- `huggingface_hub`: Essential for interacting with Hugging Face’s model repository, enabling easy access to pre-trained models and datasets.\n", "\n", "These dependencies work together to empower RAGoon with advanced capabilities in natural language processing, machine learning, and web data processing, making it a versatile tool for developers and researchers in AI." - ], - "metadata": { - "id": "-UbYh3VCrikh" - } + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u4Bq23-p34KP", - "outputId": "0f8edae4-fb9f-4faa-a7a9-b93a5ca0233b" + "outputId": "65f6250e-131d-4786-9d36-eee83f8af12d" }, "outputs": [ { @@ -87,7 +86,8 @@ "name": "stdout", "text": [ "Collecting ragoon\n", - " Downloading ragoon-0.0.8-py3-none-any.whl.metadata (7.7 kB)\n", + " Downloading ragoon-0.0.13-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: polars in /usr/local/lib/python3.10/dist-packages (0.20.2)\n", "Requirement already satisfied: beautifulsoup4==4.12.3 in /usr/local/lib/python3.10/dist-packages (from ragoon) (4.12.3)\n", "Collecting datasets==2.20.0 (from ragoon)\n", " Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)\n", @@ -129,7 +129,7 @@ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from ragoon) (4.42.4)\n", "Collecting tqdm==4.66.4 (from ragoon)\n", " Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting umap==0.1.1 (from ragoon)\n", " Downloading umap-0.1.1.tar.gz (3.2 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", @@ -262,47 +262,47 @@ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.3.1->ragoon) (1.3.0)\n", "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0->google-api-python-client==2.126.0->ragoon) (0.6.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==2.20.0->ragoon) (1.16.0)\n", - "Downloading ragoon-0.0.8-py3-none-any.whl (37 kB)\n", + "Downloading ragoon-0.0.13-py3-none-any.whl (38 kB)\n", "Downloading datasets-2.20.0-py3-none-any.whl (547 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading google_api_python_client-2.126.0-py2.py3-none-any.whl (12.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.6/12.6 MB\u001b[0m \u001b[31m73.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.6/12.6 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading groq-0.9.0-py3-none-any.whl (103 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.5/103.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.5/103.5 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading huggingface_hub-0.24.2-py3-none-any.whl (417 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.2/417.2 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.2/417.2 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading myst_parser-3.0.1-py3-none-any.whl (83 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.2/83.2 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.2/83.2 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading numpydoc-1.7.0-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading openai-1.37.1-py3-none-any.whl (337 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.0/337.0 kB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.0/337.0 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading plotly-5.23.0-py3-none-any.whl (17.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m36.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pydata_sphinx_theme-0.15.4-py3-none-any.whl (4.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m71.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pytest-8.3.2-py3-none-any.whl (341 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.4/13.4 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.4/13.4 MB\u001b[0m \u001b[31m47.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sphinx-7.4.7-py3-none-any.whl (3.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m70.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sphinx_book_theme-1.1.3-py3-none-any.whl (430 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.1/430.1 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.1/430.1 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tqdm-4.66.4-py3-none-any.whl (78 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading umap_learn-0.5.6-py3-none-any.whl (85 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading usearch-2.12.0-cp310-cp310-manylinux_2_28_x86_64.whl (1.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m54.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m53.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", "Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", "Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", @@ -315,32 +315,32 @@ "Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", "Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", "Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m423.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m164.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading docutils-0.21.2-py3-none-any.whl (587 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m587.4/587.4 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m587.4/587.4 kB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.5.0-py3-none-any.whl (316 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pygments-2.18.0-py3-none-any.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m58.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading accessible_pygments-0.0.5-py3-none-any.whl (1.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m66.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl (19.7 MB)\n", "Building wheels for collected packages: overload, umap\n", " Building wheel for overload (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for overload: filename=overload-1.1-py3-none-any.whl size=5675 sha256=bd134871ea1dd33588cb0eb38faa5141ef6e5bf1581a2df164a784a16e4f7fee\n", + " Created wheel for overload: filename=overload-1.1-py3-none-any.whl size=5675 sha256=fc19f1374f24912b92374b22f9f5b3e729044ca044e7fff2217bc49eab1a5d89\n", " Stored in directory: /root/.cache/pip/wheels/c2/bd/04/b71278036f82f85e09d62b31d780f87df6f2a2dd378a185b3e\n", " Building wheel for umap (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for umap: filename=umap-0.1.1-py3-none-any.whl size=3542 sha256=64a33bfe9c627bd2873973ff4d15a11bd54edb51ca0ae728bb1e2e868e4cb9ff\n", + " Created wheel for umap: filename=umap-0.1.1-py3-none-any.whl size=3542 sha256=6a79b33adc6299e7cf3ef7d1567da3a10ce1694b726eada5ccc5435034949c7f\n", " Stored in directory: /root/.cache/pip/wheels/15/f1/28/53dcf7a309118ed35d810a5f9cb995217800f3f269ab5771cb\n", "Successfully built overload umap\n", "Installing collected packages: umap, overload, xxhash, tqdm, pytest, pygments, pyarrow, plotly, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, h11, fsspec, faiss-cpu, docutils, dill, usearch, sphinx, scikit-learn, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, huggingface-hub, httpcore, accessible-pygments, pynndescent, pydata-sphinx-theme, nvidia-cusolver-cu12, numpydoc, myst-parser, httpx, umap-learn, sphinx-book-theme, openai, groq, google-api-python-client, datasets, sentence-transformers, ragoon\n", @@ -393,7 +393,7 @@ "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", "gcsfs 2024.6.1 requires fsspec==2024.6.1, but you have fsspec 2024.5.0 which is incompatible.\n", "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed accessible-pygments-0.0.5 datasets-2.20.0 dill-0.3.8 docutils-0.21.2 faiss-cpu-1.8.0 fsspec-2024.5.0 google-api-python-client-2.126.0 groq-0.9.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 huggingface-hub-0.24.2 multiprocess-0.70.16 myst-parser-3.0.1 numpydoc-1.7.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.20 nvidia-nvtx-cu12-12.1.105 openai-1.37.1 overload-1.1 plotly-5.23.0 pyarrow-17.0.0 pydata-sphinx-theme-0.15.4 pygments-2.18.0 pynndescent-0.5.13 pytest-8.3.2 ragoon-0.0.8 scikit-learn-1.5.1 sentence-transformers-3.0.1 sphinx-7.4.7 sphinx-book-theme-1.1.3 tqdm-4.66.4 umap-0.1.1 umap-learn-0.5.6 usearch-2.12.0 xxhash-3.4.1\n" + "\u001b[0mSuccessfully installed accessible-pygments-0.0.5 datasets-2.20.0 dill-0.3.8 docutils-0.21.2 faiss-cpu-1.8.0 fsspec-2024.5.0 google-api-python-client-2.126.0 groq-0.9.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 huggingface-hub-0.24.2 multiprocess-0.70.16 myst-parser-3.0.1 numpydoc-1.7.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.20 nvidia-nvtx-cu12-12.1.105 openai-1.37.1 overload-1.1 plotly-5.23.0 pyarrow-17.0.0 pydata-sphinx-theme-0.15.4 pygments-2.18.0 pynndescent-0.5.13 pytest-8.3.2 ragoon-0.0.13 scikit-learn-1.5.1 sentence-transformers-3.0.1 sphinx-7.4.7 sphinx-book-theme-1.1.3 tqdm-4.66.4 umap-0.1.1 umap-learn-0.5.6 usearch-2.12.0 xxhash-3.4.1\n" ] } ], @@ -403,14 +403,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": { "id": "r_zE8a8z4HMV" }, "outputs": [], "source": [ "import polars as pl\n", - "\n", + "import time\n", "from ragoon import (\n", " dataset_loader,\n", " SimilaritySearch,\n", @@ -420,19 +420,24 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "Sb6QMUxtMC7x" + }, "source": [ "# Instance creation\n", "\n", "The `SimilaritySearch` class is instantiated with specific parameters to configure the embedding model and search infrastructure. The chosen model, `louisbrulenaudet/tsdae-lemone-mbert-base`, is likely a multilingual BERT model fine-tuned with TSDAE (Transfomer-based Denoising Auto-Encoder) on a custom dataset. This model choice suggests a focus on multilingual capabilities and improved semantic representations.\n", "\n", "The `cuda` device specification leverages GPU acceleration, crucial for efficient processing of large datasets. The embedding dimension of `768` is typical for BERT-based models, representing a balance between expressiveness and computational efficiency. The `ip` (inner product) metric is selected for similarity comparisons, which is computationally faster than cosine similarity when vectors are normalized. The `i8` dtype indicates 8-bit integer quantization, a technique that significantly reduces memory usage and speeds up similarity search at the cost of a small accuracy rade-off." - ], - "metadata": { - "id": "Sb6QMUxtMC7x" - } + ] }, { "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "DO8LihEaL9Es" + }, + "outputs": [], "source": [ "instance = SimilaritySearch(\n", " model_name=\"louisbrulenaudet/tsdae-lemone-mbert-base\",\n", @@ -441,246 +446,87 @@ " metric=\"ip\",\n", " dtype=\"i8\"\n", ")" - ], + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zHehvJOdstmv" + }, + "source": [ + "## Dataset download and embeddings generation\n", + "\n", + "The encode method transforms raw text into dense vector representations. This process involves tokenization, where text is split into subword units, followed by passing these tokens through the neural network layers of the SentenceTransformer model. The resulting embeddings capture semantic information in a high-dimensional space, where similar concepts are positioned closer together. The method likely uses batching to efficiently process large datasets and may employ techniques like length sorting to optimize padding and reduce computational waste." + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 491, + "height": 214, "referenced_widgets": [ - "3b0740b7c1184b11af19a2575c2028d7", - "ed036c7e98d34ca1b1fefb5f7d7da690", - "041dcd84dd8047b8bb00dcf0a8605366", - "6ffed718be9d47b08b0f319823627692", - "bd764b4c8d3a4218a51ac6cfa016b176", - "459b6bd8f7804c6fa2cfbbf3574fea81", - "310db8d9db6e4b05b3340d9ac4e6a885", - "454e18ed66a543e88829d0eb1c7585dd", - "341d5f623a4e4d97a0b02a8b1f8372fa", - "bf1831cf76294173ad2f4506bab90601", - "d840ecb454ce401b9781ca181204320f", - "990877dd1448431ca676da862de76e66", - "215f503d83524f22b534383c76bb7aa5", - "57d8c511f130440ab43cd26946d73484", - "6ad57261fbcc4033bcf3c56e01f14099", - "9d590a1002364797a243818665f95cea", - "31967af398094a5b8003a78d7cdef06a", - "ee1e1101bcae43f8b938b70758635e28", - "fcadc705920c4dc0a965eff5b1de8088", - "9cedabb67be046b7b0dfb7ca89c8e299", - "91962d7fbd814ae0b6a6e10072f621bc", - "910e040f53c246048d85236b525d7e95", - "017b2cc0c5594dc6a4624b0bec38fe73", - "476cac6399c142efa95e85a5b159dd20", - "ce8b3efd425e43158a2b55f8e47b8d56", - "1fe1c159c31a4f8dba9beb0c53e49bbc", - "f121aa8e2b4a41feb4e7966f1a697d6f", - "66abd6f0b54c47548f3f8074d9af2af8", - "8347c3e484fa43b785c2a1392d4bf1aa", - "5a1a0dd9fd9f4f0195f8ead413dfd1a9", - "ef52f1ca67554e29a478abcd91eae44f", - "2629efe6d2044c29a22d9736b71523f4", - "2c0e040a88e948a2835e07dd3fac05f0", - "5d298e5197c94cb183ca668444f8ca81", - "1f4ea0c70e9c4923b7de5e307c01fcd5", - "8527969d2f9d4b8da907cb8d586c27eb", - "a10b2bca915841479e634c16af092d00", - "da023e01ccb34cd6a0d63ac05c4caf0f", - "cc58183628524f24b2f1cbe66527cf03", - "77a0828fab334bf280fde83577adbc36", - "874f00bd481e40c0802f3b6312d1e7c1", - "aab5b68dd3ce4d019b582188871e937e", - "f51d2b0311ad4fa895f1c7c5c2c2ecee", - "910a5796ac244e74b3fcd189166a7279", - "897fb40e1b0347a089aa4cb0fab582f3", - "744c5ae12aac4e9faa4a4845af24e3e9", - "c4616271c618426cbe1043dd0ef541a5", - "c245609de8ea43bb88c500e484638238", - "60fd5071bae7484f991e1fc6aca170de", - "de77ef90160f4d89a01e99fab514e0bb", - "ceb21ab3f6ef43c68642439fa948590c", - "141eb847045b48f69698e36bbff54e9e", - "0f598dd46d844505a64a85ccb22aa462", - "87d22f7577a94256aeca8146b6797101", - "a0c1f63835ea4700b5d59de386045f82", - "6b07d0ea362e442fb4a9f46fa66b8155", - "4d8991a173e5485a966fc51a0d58222d", - "d57b5e6e0017437db70929616ad041d7", - "9bbe6c83fe2f4bb6817e3177006af255", - "cbcb40306ff84710983dfc47b3ed0ad0", - "76bd90a147dd4d3da5b7bcb1bd759b9d", - "a3ee7025242c48aea17ce4935c467742", - "4267bf1283a04abda6e7877b43d04b52", - "0bdddb3424f44dbe97674c45488dd4eb", - "8330426f6ff140b6aba7190543688c2f", - "0fabe9fd86344ae1928752e6b4abc883", - "78a210fcb03a4450b34e73b7f4dc2280", - "ccb12bd8b80242c38f644fc73bc4edd5", - "8b647e9a5eb84dada7715f764258a7df", - "4398e38513ae40d5a0a9399c89fd60a7", - "fee8560569fa4c8797ce9d25fe587bff", - "3ff7fe78226542c4adc84898179249a6", - "0c959e89a32e42d8885bb9168ab3036f", - "bc7a5be54a8e43fc92a49b7c812b8a48", - "f8af455ea779482cab33235f505fe5ef", - "1e2db34d262f41cabd288f1100fed025", - "d26d7c429d9a4e80aba1c7de6e7e64f0", - "558cf7b57e9c4f729d834ac6d5ef6bfe", - "4cf43d2ded134c06ab1df7509d226175", - "14e16de286304a94a75960fe47176237", - "c435a072016d4dc3b30e409f2410450e", - "b4295cdb89d14877b60eb45c913f7409", - "91db42f83e454d1baf20cc1e0af0e1e1", - "227f6358cc4a47b3b6da3001a0e3fdbf", - "cf4bae6c34b34d3abeaa1a77454f4097", - "834f4a6d80eb4c1a8a3cf6276e071553", - "c5d2f8d7eb8d40c8b976c43406ed0204", - "6848d43f27bc4ce195806fb51e741a6f", - "c182ee1819344032ad954b39d261e35e", - "b20264f18ecc4f89bdb2ad9ec3052d0f", - "0ab071ebac92435791261ebebca4d103", - "115636ffa31a4212a2bb7294733744f3", - "c8b243e465c74845b21ce64729bd7e34", - "09d7e4295da04cc68569f096f60dc0da", - "18a5275ac2864347ba605046e798f344", - "a193ba8df21d4434a83079faaace6a16", - "810c7730004b4d92adde4a835b9e8896", - "9c17c34bca294f6ab92dc2973ad831bd", - "9beabf1daab340ae82a12ae4ad44defd", - "0d52cd598ddb4f63ab68ebb3a1672936", - "a7378ef70c964373873ac13eb9188af7", - "532eeb99350e444fad7fe3f659e118cb", - "99f18945bfa4496badf1b6398eced269", - "26924bd271a34fd3b6bc4cd4ef7d0fc8", - "1069c57ab76349acb1df1e267836877b", - "0d5f036ec3d84483b88de5c312c24631", - "b35af0a4b0394d6ca66388187a12b9c9", - "fd08587944f34eb2a2e72d6ebdf11dc9", - "5f8560ee9dea418a85f693dd4f771a72", - "961a5e9e866f4a0aa772e0395d2c5cc4", - "9ba34593f7c449acacde80b276e47e5e", - "b227d4b242384297883d73e2ef4ad36b", - "2aa6f24c423a4c9fbae4c9e2f59cd1ef", - "13147575bc324653ad03fe2b764c0f30", - "622c32270e6548ed9514345c0c1450cb", - "15f0540100a44d1cada3d2f6a0e927d4", - "183376a937604feca5a8fe1e01e8d8e6", - "e192d2a4b79b45ff858d90751fc7ebea", - "2693cf39ba874fff92e1b5294e796ed7", - "a070c177e8f94802a073d9be6a3226b6", - "984117efb8c548ccb2debd825d89d156" + "f966ff197d7147688f48326f68d08cac", + "eb8046d4485748a1ace93506f560f114", + "1147ef8de9ae47a098f1c369880720dd", + "1353cebec98241ce9f5e7c700c3e58fd", + "f8aad3b7486948489bd304ef8da5e3c5", + "e1dc1f8d581a4204aca2239e08045a15", + "3c21d198cbe14899b40398ac27f1ab77", + "3afcc81fbcf14f45a16c574ef11cb106", + "2d08b11a9ba04abbb662bf8f7c2e21c4", + "c7479f174879421c86f01a13b5f80bf4", + "b2ec1015b8f448c698f7b6e427bf4cea", + "18760a4be43c42089887daf6fc311a0b", + "190d3143f30e481984b35f159712a7b9", + "2f3843ac77fa463a99e7d205e0249730", + "bdea1ad25a054c9c9ef5341ef4452926", + "cd0cd1f71b9d42078da17e373af328cf", + "546384f6947040dfad88f3c56974fc17", + "5afa2b501e0947cd81e2e16aa3482643", + "004f0664294e4090b753d92bbbc8e749", + "4958d65528a844608ea6e6be77787f4c", + "144d3a9883ac4d8086d2c018b1779f8d", + "4780eed53cbf49f4a7d0062e301f7414", + "b8d2085d254a4f8881287ad4e9b9ba63", + "4350660466954ab985d7c2b5c2d50dfb", + "bec06e5cb6b245d2b1b01dfe5444109f", + "ae62f3c097754596ac8f7fac5cc1a619", + "f8587c86967343159b70520ca0e545d0", + "d972ef0e134d469b8c8cb279e07b7c20", + "cc9c4fef61984633a8d259dc3130230d", + "40d840d499ff42b98c178dad6498ce94", + "2e9e79856a724a35b8b2a5f7d255b68d", + "fab1c206f95e40a7b5404d46a1e36675", + "55faf3f1a3c24d70be75a9f60f74ee56", + "ea5a2db561964901958dabafbb52ad3e", + "db45b8f6aa3c41e7a507fe2a4311e4d3", + "6196383847cd455a8c1b3d64dd64f374", + "710a214347d34ef689b2cf79d5f32a66", + "f71910c3ac824cd59f0b07a2c2ff0de8", + "e5a2126f30f443e6815db58bf215c67b", + "3dc1b67aa00b40b696da039eff4c0780", + "c4b169eb0424470ba9fb64cafb547cdd", + "beced8d9f1404627833bc549b34d78e7", + "7adeb43a7c134edfa73aa5b9f47b1382", + "69a325bec0a647a5b76fd8480392a581" ] }, - "id": "DO8LihEaL9Es", - "outputId": "05761d16-36e5-4278-c36b-f883b46a39fd" + "id": "JeOvejs-LSh5", + "outputId": "a3e13639-478e-4aef-a8ef-0ef021e328b3" }, - "execution_count": null, "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "modules.json: 0%| | 0.00/229 [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "3b0740b7c1184b11af19a2575c2028d7" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "config_sentence_transformers.json: 0%| | 0.00/123 [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "990877dd1448431ca676da862de76e66" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "README.md: 0%| | 0.00/6.11k [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "017b2cc0c5594dc6a4624b0bec38fe73" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "sentence_bert_config.json: 0%| | 0.00/53.0 [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "5d298e5197c94cb183ca668444f8ca81" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "config.json: 0%| | 0.00/828 [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "897fb40e1b0347a089aa4cb0fab582f3" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "model.safetensors: 0%| | 0.00/669M [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "6b07d0ea362e442fb4a9f46fa66b8155" - } - }, - "metadata": {} - }, { "output_type": "display_data", "data": { "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/1.19k [00:00, ?B/s]" + "Downloading readme: 0%| | 0.00/3.73k [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "78a210fcb03a4450b34e73b7f4dc2280" + "model_id": "f966ff197d7147688f48326f68d08cac" } }, "metadata": {} @@ -689,12 +535,12 @@ "output_type": "display_data", "data": { "text/plain": [ - "vocab.txt: 0%| | 0.00/872k [00:00, ?B/s]" + "Downloading data: 0%| | 0.00/389k [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "558cf7b57e9c4f729d834ac6d5ef6bfe" + "model_id": "18760a4be43c42089887daf6fc311a0b" } }, "metadata": {} @@ -703,12 +549,12 @@ "output_type": "display_data", "data": { "text/plain": [ - "tokenizer.json: 0%| | 0.00/2.56M [00:00, ?B/s]" + "Generating train split: 0%| | 0/414 [00:00, ? examples/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "c182ee1819344032ad954b39d261e35e" + "model_id": "b8d2085d254a4f8881287ad4e9b9ba63" } }, "metadata": {} @@ -717,45 +563,30 @@ "output_type": "display_data", "data": { "text/plain": [ - "special_tokens_map.json: 0%| | 0.00/125 [00:00, ?B/s]" + "Saving the dataset (0/1 shards): 0%| | 0/414 [00:00, ? examples/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "0d52cd598ddb4f63ab68ebb3a1672936" + "model_id": "ea5a2db561964901958dabafbb52ad3e" } }, "metadata": {} }, { - "output_type": "display_data", + "output_type": "execute_result", "data": { "text/plain": [ - "1_Pooling/config.json: 0%| | 0.00/190 [00:00, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "9ba34593f7c449acacde80b276e47e5e" - } + "Dataset({\n", + " features: ['instruction', 'output', 'input'],\n", + " num_rows: 414\n", + "})" + ] }, - "metadata": {} + "metadata": {}, + "execution_count": 6 } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Dataset download and embeddings generation\n", - "\n", - "The encode method transforms raw text into dense vector representations. This process involves tokenization, where text is split into subword units, followed by passing these tokens through the neural network layers of the SentenceTransformer model. The resulting embeddings capture semantic information in a high-dimensional space, where similar concepts are positioned closer together. The method likely uses batching to efficiently process large datasets and may employ techniques like length sorting to optimize padding and reduce computational waste." ], - "metadata": { - "id": "zHehvJOdstmv" - } - }, - { - "cell_type": "code", "source": [ "dataset = dataset_loader(\n", " name=\"louisbrulenaudet/dac6-instruct\",\n", @@ -765,87 +596,32 @@ "\n", "dataset.save_to_disk(\"dataset.hf\")\n", "dataset" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 117, - "referenced_widgets": [ - "6164d4034ebc4c9998b24eea7fcb6c9c", - "f2d186e9249c48739ad657d4fced59e1", - "a1d3611608a04262acef968c4bcf5cab", - "af433d70c63f4c98b7c7aa5805de545b", - "8ebc33062f62464fb07b85196fa51b83", - "7a04da2c8235414186ba778bcd747c55", - "13b1aae5e01c4a9da32c2652cb50cb33", - "849fbfab0ff94be89bc23b9f45956353", - "b846f7691776422598e50f2d9a6719cd", - "9b64e88626304a8797b23e5f9f69752c", - "6c8a85d410e44c34a1133610f43f1d2e" - ] - }, - "id": "JeOvejs-LSh5", - "outputId": "a35a3e5e-42ee-4526-fdf3-949ee9f60733" - }, - "execution_count": 18, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Saving the dataset (0/1 shards): 0%| | 0/414 [00:00, ? examples/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "6164d4034ebc4c9998b24eea7fcb6c9c" - } - }, - "metadata": {} - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Dataset({\n", - " features: ['instruction', 'output', 'input'],\n", - " num_rows: 414\n", - "})" - ] - }, - "metadata": {}, - "execution_count": 18 - } ] }, { "cell_type": "code", - "source": [ - "embeddings = instance.encode(corpus=dataset[\"output\"])\n", - "embeddings" - ], + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 270, + "height": 275, "referenced_widgets": [ - "e5c3429f78894b8088671a65198d35f0", - "2eac66b7087d4b1f86041f1fa49c8d92", - "86a3053d094b4853b3258cde202c6f42", - "6bd307756363414f832e8e5f23748cb2", - "0fd8f5d27312461192ccf8fbc2212c4b", - "181c9f9932e0483b8ca4fe4b13fe7ecb", - "7c091738255b4610ad4e8f20952748a8", - "6f84050fc7e046a9bb2738a84356d43d", - "244c4ae566a74163b0a2795374281256", - "39089e80fbaf495088f635bd15ab8bbe", - "73a79127b8b74dd3baba0802f729feea" + "1a150a4385c947d19bbcb642611d0f5a", + "79709e242e864d9ead8af5310e8f9f42", + "b8f1e466858a4d289a4a471e64c148ab", + "8e0ac888531944388c02c74785e56bea", + "073fcf6950e24490859da4d21437e010", + "8a1dacef7bea4f609477fae2875d7dbd", + "5a6471db3479484f86609b90a3cdb772", + "6c66014360cf439193e6ac0cecc9458d", + "b208eb7f196647cd8582151e595b5206", + "4a08ab2c64734cf98553f715d3a48f2b", + "0531d626edda4d9b8ee125044f72a81e" ] }, "id": "EpMP2Uv-VG0Z", - "outputId": "7ecd3bbc-eeaa-4463-9fe9-1b584dc0c1ec" + "outputId": "95463032-96a2-480a-d40e-cef12d1db7aa" }, - "execution_count": 14, "outputs": [ { "output_type": "display_data", @@ -856,7 +632,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "e5c3429f78894b8088671a65198d35f0" + "model_id": "1a150a4385c947d19bbcb642611d0f5a" } }, "metadata": {} @@ -865,56 +641,52 @@ "output_type": "execute_result", "data": { "text/plain": [ - "array([[ 0.02508392, -0.00361205, -0.02245241, ..., 0.03226953,\n", - " -0.01613418, -0.04832081],\n", - " [-0.02329118, -0.02008231, 0.01489964, ..., 0.02344927,\n", - " 0.04810405, -0.02210646],\n", - " [ 0.00223175, -0.01829153, -0.02352205, ..., 0.01129541,\n", - " 0.00152115, -0.03072203],\n", + "array([[ 0.02508387, -0.00361211, -0.02245243, ..., 0.03226954,\n", + " -0.01613413, -0.04832084],\n", + " [-0.02329118, -0.02008241, 0.01489968, ..., 0.02344931,\n", + " 0.04810406, -0.02210645],\n", + " [ 0.00223175, -0.01829162, -0.02352204, ..., 0.01129548,\n", + " 0.00152115, -0.03072198],\n", " ...,\n", - " [ 0.01643743, -0.02181389, -0.01811864, ..., 0.03853112,\n", - " 0.00745914, -0.02401087],\n", - " [ 0.00709248, 0.009109 , -0.05994354, ..., 0.04204508,\n", - " -0.00673348, -0.01090278],\n", - " [-0.00633491, -0.01061832, 0.02608344, ..., 0.07235143,\n", - " 0.00243154, -0.01910995]], dtype=float32)" + " [ 0.01643737, -0.02181391, -0.01811867, ..., 0.03853118,\n", + " 0.00745909, -0.02401081],\n", + " [ 0.00709253, 0.00910895, -0.05994356, ..., 0.04204512,\n", + " -0.00673348, -0.0109027 ],\n", + " [-0.006335 , -0.01061835, 0.02608343, ..., 0.07235146,\n", + " 0.00243155, -0.01910998]], dtype=float32)" ] }, "metadata": {}, - "execution_count": 14 + "execution_count": 7 } + ], + "source": [ + "embeddings = instance.encode(corpus=dataset[\"output\"])\n", + "embeddings" ] }, { "cell_type": "markdown", + "metadata": { + "id": "O864pxFIt20G" + }, "source": [ "# Binary quantization\n", "\n", "Binary quantization is an extreme form of dimensionality reduction, where each dimension of the embedding is represented by a single bit. This process involves setting a threshold (often the median value for each dimension across the dataset) and encoding values above this threshold as 1 and below as 0. While this dramatically reduces memory usage (compressing each embedding to just 96 bytes for a 768-dimensional vector), it also results in a more significant loss of information compared to other quantization methods. However, it enables extremely fast similarity computations using hardware-accelerated bitwise operations." - ], - "metadata": { - "id": "O864pxFIt20G" - } + ] }, { "cell_type": "code", - "source": [ - "ubinary_embeddings = instance.quantize_embeddings(\n", - " embeddings=embeddings,\n", - " quantization_type=\"ubinary\"\n", - ")\n", - "\n", - "ubinary_embeddings" - ], + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 153 + "height": 455 }, "id": "jFHFF5eDVTs4", - "outputId": "10e70ec9-fc00-42e0-ec59-422fc597eef3" + "outputId": "aa906b0e-d583-4dcb-8731-d44d30c9531b" }, - "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -940,7 +712,7 @@ " display: none;\n", " }\n", " \n", - "
ndarray (414, 96)
array([[152, 86, 70, ..., 173, 112, 84],\n", + "ndarray (414, 96)array([[152, 86, 70, ..., 173, 112, 84],\n", " [ 42, 215, 109, ..., 13, 60, 198],\n", " [136, 151, 117, ..., 77, 208, 22],\n", " ...,\n", @@ -950,10 +722,10 @@ " (() => {\n", " const titles = ['show data', 'hide data'];\n", " let index = 0\n", - " document.querySelector('#id-5c68268b-af3f-49a0-ad21-654971335804 button').onclick = (e) => {\n", - " document.querySelector('#id-5c68268b-af3f-49a0-ad21-654971335804').classList.toggle('show_array');\n", + " document.querySelector('#id-cbca854f-e08d-4453-8ff7-3bb69d77de46 button').onclick = (e) => {\n", + " document.querySelector('#id-cbca854f-e08d-4453-8ff7-3bb69d77de46').classList.toggle('show_array');\n", " index = (++index) % 2;\n", - " document.querySelector('#id-5c68268b-af3f-49a0-ad21-654971335804 button').textContent = titles[index];\n", + " document.querySelector('#id-cbca854f-e08d-4453-8ff7-3bb69d77de46 button').textContent = titles[index];\n", " e.preventDefault();\n", " e.stopPropagation();\n", " }\n", @@ -964,37 +736,37 @@ "metadata": {}, "execution_count": 8 } + ], + "source": [ + "ubinary_embeddings = instance.quantize_embeddings(\n", + " embeddings=embeddings,\n", + " quantization_type=\"ubinary\"\n", + ")\n", + "\n", + "ubinary_embeddings" ] }, { "cell_type": "markdown", + "metadata": { + "id": "KSWoo96YVlpd" + }, "source": [ "# Quantizing embeddings to 8-bit integers\n", "\n", "Int8 quantization maps the continuous embedding values to a discrete set of 256 values represented by 8-bit integers. This process typically involves scaling the original values to fit within the int8 range (-128 to 127) and may use techniques like asymmetric quantization to preserve more information. While less extreme than binary quantization, int8 still offers substantial memory savings (reducing each dimension to 1 byte) while preserving more of the original information. This quantization enables efficient SIMD (Single Instruction, Multiple Data) operations on modern CPUs, significantly accelerating similarity computations." - ], - "metadata": { - "id": "KSWoo96YVlpd" - } + ] }, { "cell_type": "code", - "source": [ - "int8_embeddings = instance.quantize_embeddings(\n", - " embeddings=embeddings,\n", - " quantization_type=\"int8\"\n", - ")\n", - "\n", - "int8_embeddings" - ], + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SCEiWKlXVyAO", - "outputId": "cac70c19-8441-4872-9cf3-afdba953bbaa" + "outputId": "20f5d463-1874-4aad-811b-16150468dbab" }, - "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -1012,36 +784,37 @@ "metadata": {}, "execution_count": 9 } + ], + "source": [ + "int8_embeddings = instance.quantize_embeddings(\n", + " embeddings=embeddings,\n", + " quantization_type=\"int8\"\n", + ")\n", + "\n", + "int8_embeddings" ] }, { "cell_type": "markdown", + "metadata": { + "id": "fA8GqkFxk9AF" + }, "source": [ "# Creating a USEARCH index\n", "\n", "USEARCH is designed for high-performance approximate nearest neighbor search. The index creation process likely involves building a hierarchical structure, possibly a navigable small world (NSW) graph, which allows for efficient traversal during search operations. The use of int8 quantized embeddings enables USEARCH to leverage SIMD instructions for rapid distance calculations. The resulting index balances search speed and accuracy, allowing for fast retrieval with a controlled trade-off in precision." - ], - "metadata": { - "id": "fA8GqkFxk9AF" - } + ] }, { "cell_type": "code", - "source": [ - "instance.create_usearch_index(\n", - " int8_embeddings=int8_embeddings,\n", - " index_path=\"./usearch_int8.index\",\n", - " save=True\n", - ")" - ], + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Uyt4yUfCV3Jy", - "outputId": "29ba3af3-841d-4079-e7c6-bd8033221c88" + "outputId": "065fdf6b-b131-4fb8-b131-b1ad23956364" }, - "execution_count": 17, "outputs": [ { "output_type": "execute_result", @@ -1071,2935 +844,424 @@ ] }, "metadata": {}, - "execution_count": 17 + "execution_count": 10 } + ], + "source": [ + "instance.create_usearch_index(\n", + " int8_embeddings=int8_embeddings,\n", + " index_path=\"./usearch_int8.index\",\n", + " save=True\n", + ")" ] }, { "cell_type": "markdown", + "metadata": { + "id": "bu4tRZdnlDxe" + }, "source": [ "# Creating a FAISS index\n", "\n", "FAISS (Facebook AI Similarity Search) is a library that provides efficient similarity search and clustering of dense vectors. For binary vectors, FAISS typically uses specialized index structures like the BinaryFlat index. This index performs exhaustive search using Hamming distance, which can be computed extremely efficiently on modern hardware using XOR and bit count operations. The binary nature of the index allows for compact storage and very fast search operations, albeit with reduced granularity in similarity scores compared to float-based indices." - ], - "metadata": { - "id": "bu4tRZdnlDxe" - } + ] }, { "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "aT6qW9OZlDFZ" + }, + "outputs": [], "source": [ "instance.create_faiss_index(\n", " ubinary_embeddings=ubinary_embeddings,\n", " index_path=\"./faiss_ubinary.index\",\n", " save=True\n", ")" - ], - "metadata": { - "id": "aT6qW9OZlDFZ" - }, - "execution_count": 16, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "uhPo-7y4lJyZ" + }, "source": [ "# Performing a similarity search\n", "\n", "The search process combines the strengths of both USEARCH and FAISS indices. It likely first uses the binary FAISS index for a rapid initial filtering step, leveraging the efficiency of Hamming distance calculations. The top candidates from this step (increased by the rescore_multiplier for better recall) are then refined using the more precise int8 USEARCH index. This two-stage approach balances speed and accuracy, allowing for quick pruning of unlikely candidates followed by more accurate rescoring.\n", "\n", "The query is first encoded using the same model and quantization processes as the corpus. The rescore_multiplier of 4 means the initial retrieval fetches 40 candidates (4 * top_k), which are then reranked to produce the final top 10 results. This oversampling helps mitigate the potential loss of relevant results due to quantization approximations." - ], - "metadata": { - "id": "uhPo-7y4lJyZ" - } - }, - { - "cell_type": "code", - "source": [ - "top_k_scores, top_k_indices = instance.search(\n", - " query=\"Définir le rôle d'un intermédiaire concepteur conformément à l'article 1649 AE du Code général des Impôts.\",\n", - " top_k=10,\n", - " rescore_multiplier=4\n", - ")\n", - "print(top_k_scores, top_k_indices)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 86, - "referenced_widgets": [ - "1b7a1c758c9844f6bb3297f5e907b125", - "a99fdb97b94b42b1ae2ede146cc02512", - "ce8efeb255504bcd8797022b70c89ac0", - "769a7ab752aa41a297ffa878f3b717b6", - "902ca7262ca94a7180f522df6b9b54f9", - "5f85ba1a93d04dbda65acab723e60ba2", - "125128e3613f40feb8153e3dd9e3b3aa", - "c2d320a7c814446db7159feeb907b6a2", - "f5d20157226846fd983f11904cf9eecf", - "fb117e7bda4a445ba65b472a64971440", - "ffcdbc40630a4056a8929fb5a8a88cdd" - ] - }, - "id": "SjyvMgbJlIBn", - "outputId": "b24a2110-9631-4a20-a38f-cb72d137d698" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Batches: 0%| | 0/1 [00:00, ?it/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "1b7a1c758c9844f6bb3297f5e907b125" - } - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[2647.0556676079723, 2148.418636998307, 2106.1046419678387, 1961.8936295222884, 1947.321377885688, 1864.701749966327, 1845.0419668774193, 1795.922021207407, 1774.0641026571975, 1706.500367158289] [342, 66, 359, 242, 369, 10, 33, 261, 181, 230]\n" - ] - } ] }, { "cell_type": "code", - "source": [ - "try:\n", - " dataframe = pl.from_arrow(dataset.data.table).with_row_index()\n", - "\n", - "except:\n", - " dataframe = pl.from_arrow(dataset.data.table).with_row_count(\n", - " name=\"index\"\n", - " )\n", - "\n", - "\n", - "scores_df = pl.DataFrame(\n", - " {\n", - " \"index\": top_k_indices,\n", - " \"score\": top_k_scores\n", - " }\n", - ").with_columns(\n", - " pl.col(\"index\").cast(pl.UInt32)\n", - ")\n", - "\n", - "search_results = dataframe.filter(\n", - " pl.col(\"index\").is_in(top_k_indices)\n", - ").join(\n", - " scores_df,\n", - " how=\"inner\",\n", - " on=\"index\"\n", - ")\n", - "\n", - "search_results" - ], - "metadata": { - "id": "m2hWxxDlWSx3" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Embeddings visualization" - ], - "metadata": { - "id": "ELYDV7R3V-Tx" - } - }, - { - "cell_type": "code", - "source": [ - "visualizer = EmbeddingsVisualizer(\n", - " index_path=\"./faiss_ubinary.index\",\n", - " dataset_path=\"./dataset.hf\"\n", - ")" - ], - "metadata": { - "id": "14nt56-u1UDX" - }, - "execution_count": 20, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "visualizer.visualize(\n", - " column=\"output\"\n", - " method=\"pca\",\n", - " save_html=True,\n", - " html_file_name=\"embedding_visualization.html\"\n", - ")" - ], - "metadata": { - "id": "-Q60-p1R2WwI" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "authorship_tag": "ABX9TyMNiEWE/yG9aSWSlND8w7Z7", - "include_colab_link": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "3b0740b7c1184b11af19a2575c2028d7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ed036c7e98d34ca1b1fefb5f7d7da690", - "IPY_MODEL_041dcd84dd8047b8bb00dcf0a8605366", - "IPY_MODEL_6ffed718be9d47b08b0f319823627692" - ], - "layout": "IPY_MODEL_bd764b4c8d3a4218a51ac6cfa016b176" - } - }, - "ed036c7e98d34ca1b1fefb5f7d7da690": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_459b6bd8f7804c6fa2cfbbf3574fea81", - "placeholder": "", - "style": "IPY_MODEL_310db8d9db6e4b05b3340d9ac4e6a885", - "value": "modules.json: 100%" - } - }, - "041dcd84dd8047b8bb00dcf0a8605366": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_454e18ed66a543e88829d0eb1c7585dd", - "max": 229, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_341d5f623a4e4d97a0b02a8b1f8372fa", - "value": 229 - } - }, - "6ffed718be9d47b08b0f319823627692": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bf1831cf76294173ad2f4506bab90601", - "placeholder": "", - "style": "IPY_MODEL_d840ecb454ce401b9781ca181204320f", - "value": " 229/229 [00:00<00:00, 11.3kB/s]" - } - }, - "bd764b4c8d3a4218a51ac6cfa016b176": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "459b6bd8f7804c6fa2cfbbf3574fea81": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "310db8d9db6e4b05b3340d9ac4e6a885": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "454e18ed66a543e88829d0eb1c7585dd": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "341d5f623a4e4d97a0b02a8b1f8372fa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "bf1831cf76294173ad2f4506bab90601": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d840ecb454ce401b9781ca181204320f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "990877dd1448431ca676da862de76e66": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_215f503d83524f22b534383c76bb7aa5", - "IPY_MODEL_57d8c511f130440ab43cd26946d73484", - "IPY_MODEL_6ad57261fbcc4033bcf3c56e01f14099" - ], - "layout": "IPY_MODEL_9d590a1002364797a243818665f95cea" - } - }, - "215f503d83524f22b534383c76bb7aa5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_31967af398094a5b8003a78d7cdef06a", - "placeholder": "", - "style": "IPY_MODEL_ee1e1101bcae43f8b938b70758635e28", - "value": "config_sentence_transformers.json: 100%" - } - }, - "57d8c511f130440ab43cd26946d73484": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fcadc705920c4dc0a965eff5b1de8088", - "max": 123, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_9cedabb67be046b7b0dfb7ca89c8e299", - "value": 123 - } - }, - "6ad57261fbcc4033bcf3c56e01f14099": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_91962d7fbd814ae0b6a6e10072f621bc", - "placeholder": "", - "style": "IPY_MODEL_910e040f53c246048d85236b525d7e95", - "value": " 123/123 [00:00<00:00, 9.79kB/s]" - } - }, - "9d590a1002364797a243818665f95cea": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "31967af398094a5b8003a78d7cdef06a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ee1e1101bcae43f8b938b70758635e28": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fcadc705920c4dc0a965eff5b1de8088": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9cedabb67be046b7b0dfb7ca89c8e299": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "91962d7fbd814ae0b6a6e10072f621bc": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "910e040f53c246048d85236b525d7e95": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "017b2cc0c5594dc6a4624b0bec38fe73": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_476cac6399c142efa95e85a5b159dd20", - "IPY_MODEL_ce8b3efd425e43158a2b55f8e47b8d56", - "IPY_MODEL_1fe1c159c31a4f8dba9beb0c53e49bbc" - ], - "layout": "IPY_MODEL_f121aa8e2b4a41feb4e7966f1a697d6f" - } - }, - "476cac6399c142efa95e85a5b159dd20": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_66abd6f0b54c47548f3f8074d9af2af8", - "placeholder": "", - "style": "IPY_MODEL_8347c3e484fa43b785c2a1392d4bf1aa", - "value": "README.md: 100%" - } - }, - "ce8b3efd425e43158a2b55f8e47b8d56": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5a1a0dd9fd9f4f0195f8ead413dfd1a9", - "max": 6114, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ef52f1ca67554e29a478abcd91eae44f", - "value": 6114 - } - }, - "1fe1c159c31a4f8dba9beb0c53e49bbc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2629efe6d2044c29a22d9736b71523f4", - "placeholder": "", - "style": "IPY_MODEL_2c0e040a88e948a2835e07dd3fac05f0", - "value": " 6.11k/6.11k [00:00<00:00, 302kB/s]" - } - }, - "f121aa8e2b4a41feb4e7966f1a697d6f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "66abd6f0b54c47548f3f8074d9af2af8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8347c3e484fa43b785c2a1392d4bf1aa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5a1a0dd9fd9f4f0195f8ead413dfd1a9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ef52f1ca67554e29a478abcd91eae44f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2629efe6d2044c29a22d9736b71523f4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2c0e040a88e948a2835e07dd3fac05f0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5d298e5197c94cb183ca668444f8ca81": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1f4ea0c70e9c4923b7de5e307c01fcd5", - "IPY_MODEL_8527969d2f9d4b8da907cb8d586c27eb", - "IPY_MODEL_a10b2bca915841479e634c16af092d00" - ], - "layout": "IPY_MODEL_da023e01ccb34cd6a0d63ac05c4caf0f" - } - }, - "1f4ea0c70e9c4923b7de5e307c01fcd5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cc58183628524f24b2f1cbe66527cf03", - "placeholder": "", - "style": "IPY_MODEL_77a0828fab334bf280fde83577adbc36", - "value": "sentence_bert_config.json: 100%" - } - }, - "8527969d2f9d4b8da907cb8d586c27eb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_874f00bd481e40c0802f3b6312d1e7c1", - "max": 53, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_aab5b68dd3ce4d019b582188871e937e", - "value": 53 - } - }, - "a10b2bca915841479e634c16af092d00": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f51d2b0311ad4fa895f1c7c5c2c2ecee", - "placeholder": "", - "style": "IPY_MODEL_910a5796ac244e74b3fcd189166a7279", - "value": " 53.0/53.0 [00:00<00:00, 3.83kB/s]" - } - }, - "da023e01ccb34cd6a0d63ac05c4caf0f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cc58183628524f24b2f1cbe66527cf03": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "77a0828fab334bf280fde83577adbc36": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "874f00bd481e40c0802f3b6312d1e7c1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "aab5b68dd3ce4d019b582188871e937e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "f51d2b0311ad4fa895f1c7c5c2c2ecee": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "910a5796ac244e74b3fcd189166a7279": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "897fb40e1b0347a089aa4cb0fab582f3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_744c5ae12aac4e9faa4a4845af24e3e9", - "IPY_MODEL_c4616271c618426cbe1043dd0ef541a5", - "IPY_MODEL_c245609de8ea43bb88c500e484638238" - ], - "layout": "IPY_MODEL_60fd5071bae7484f991e1fc6aca170de" - } - }, - "744c5ae12aac4e9faa4a4845af24e3e9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_de77ef90160f4d89a01e99fab514e0bb", - "placeholder": "", - "style": "IPY_MODEL_ceb21ab3f6ef43c68642439fa948590c", - "value": "config.json: 100%" - } - }, - "c4616271c618426cbe1043dd0ef541a5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_141eb847045b48f69698e36bbff54e9e", - "max": 828, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0f598dd46d844505a64a85ccb22aa462", - "value": 828 - } - }, - "c245609de8ea43bb88c500e484638238": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_87d22f7577a94256aeca8146b6797101", - "placeholder": "", - "style": "IPY_MODEL_a0c1f63835ea4700b5d59de386045f82", - "value": " 828/828 [00:00<00:00, 63.3kB/s]" - } - }, - "60fd5071bae7484f991e1fc6aca170de": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "de77ef90160f4d89a01e99fab514e0bb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ceb21ab3f6ef43c68642439fa948590c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "141eb847045b48f69698e36bbff54e9e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0f598dd46d844505a64a85ccb22aa462": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "87d22f7577a94256aeca8146b6797101": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a0c1f63835ea4700b5d59de386045f82": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6b07d0ea362e442fb4a9f46fa66b8155": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4d8991a173e5485a966fc51a0d58222d", - "IPY_MODEL_d57b5e6e0017437db70929616ad041d7", - "IPY_MODEL_9bbe6c83fe2f4bb6817e3177006af255" - ], - "layout": "IPY_MODEL_cbcb40306ff84710983dfc47b3ed0ad0" - } - }, - "4d8991a173e5485a966fc51a0d58222d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_76bd90a147dd4d3da5b7bcb1bd759b9d", - "placeholder": "", - "style": "IPY_MODEL_a3ee7025242c48aea17ce4935c467742", - "value": "model.safetensors: 100%" - } - }, - "d57b5e6e0017437db70929616ad041d7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4267bf1283a04abda6e7877b43d04b52", - "max": 669448040, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0bdddb3424f44dbe97674c45488dd4eb", - "value": 669448040 - } - }, - "9bbe6c83fe2f4bb6817e3177006af255": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8330426f6ff140b6aba7190543688c2f", - "placeholder": "", - "style": "IPY_MODEL_0fabe9fd86344ae1928752e6b4abc883", - "value": " 669M/669M [00:13<00:00, 48.8MB/s]" - } - }, - "cbcb40306ff84710983dfc47b3ed0ad0": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "76bd90a147dd4d3da5b7bcb1bd759b9d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a3ee7025242c48aea17ce4935c467742": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4267bf1283a04abda6e7877b43d04b52": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0bdddb3424f44dbe97674c45488dd4eb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8330426f6ff140b6aba7190543688c2f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0fabe9fd86344ae1928752e6b4abc883": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "78a210fcb03a4450b34e73b7f4dc2280": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ccb12bd8b80242c38f644fc73bc4edd5", - "IPY_MODEL_8b647e9a5eb84dada7715f764258a7df", - "IPY_MODEL_4398e38513ae40d5a0a9399c89fd60a7" - ], - "layout": "IPY_MODEL_fee8560569fa4c8797ce9d25fe587bff" - } - }, - "ccb12bd8b80242c38f644fc73bc4edd5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3ff7fe78226542c4adc84898179249a6", - "placeholder": "", - "style": "IPY_MODEL_0c959e89a32e42d8885bb9168ab3036f", - "value": "tokenizer_config.json: 100%" - } - }, - "8b647e9a5eb84dada7715f764258a7df": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bc7a5be54a8e43fc92a49b7c812b8a48", - "max": 1190, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_f8af455ea779482cab33235f505fe5ef", - "value": 1190 - } - }, - "4398e38513ae40d5a0a9399c89fd60a7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1e2db34d262f41cabd288f1100fed025", - "placeholder": "", - "style": "IPY_MODEL_d26d7c429d9a4e80aba1c7de6e7e64f0", - "value": " 1.19k/1.19k [00:00<00:00, 90.8kB/s]" - } - }, - "fee8560569fa4c8797ce9d25fe587bff": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3ff7fe78226542c4adc84898179249a6": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0c959e89a32e42d8885bb9168ab3036f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "bc7a5be54a8e43fc92a49b7c812b8a48": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f8af455ea779482cab33235f505fe5ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "1e2db34d262f41cabd288f1100fed025": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d26d7c429d9a4e80aba1c7de6e7e64f0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "558cf7b57e9c4f729d834ac6d5ef6bfe": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4cf43d2ded134c06ab1df7509d226175", - "IPY_MODEL_14e16de286304a94a75960fe47176237", - "IPY_MODEL_c435a072016d4dc3b30e409f2410450e" - ], - "layout": "IPY_MODEL_b4295cdb89d14877b60eb45c913f7409" - } - }, - "4cf43d2ded134c06ab1df7509d226175": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_91db42f83e454d1baf20cc1e0af0e1e1", - "placeholder": "", - "style": "IPY_MODEL_227f6358cc4a47b3b6da3001a0e3fdbf", - "value": "vocab.txt: 100%" - } - }, - "14e16de286304a94a75960fe47176237": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cf4bae6c34b34d3abeaa1a77454f4097", - "max": 871891, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_834f4a6d80eb4c1a8a3cf6276e071553", - "value": 871891 - } - }, - "c435a072016d4dc3b30e409f2410450e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c5d2f8d7eb8d40c8b976c43406ed0204", - "placeholder": "", - "style": "IPY_MODEL_6848d43f27bc4ce195806fb51e741a6f", - "value": " 872k/872k [00:00<00:00, 3.24MB/s]" - } - }, - "b4295cdb89d14877b60eb45c913f7409": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "91db42f83e454d1baf20cc1e0af0e1e1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "227f6358cc4a47b3b6da3001a0e3fdbf": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cf4bae6c34b34d3abeaa1a77454f4097": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104, + "referenced_widgets": [ + "a89620d6be294e82a8f62b5382833c8b", + "129bfeedabe14bde820e30761b4951a2", + "6cd900679d174fe7983912993139738f", + "b9c6e11024794547a8b872fa5e9d3ad5", + "70f6c23316174c3bb938e83311d653ca", + "39dbbe22ff9741bfa55991555e544d50", + "b496735e982d4d2e9185f0dfbf4a887e", + "a1ca7953c4474532a5b39c3bfe253b8b", + "a68d8a70e19244c4ac96527c231dccb3", + "0994ad851d0c4484b202785330c693ae", + "74ba15a0df444fe791c95794675cb877" + ] }, - "834f4a6d80eb4c1a8a3cf6276e071553": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + "id": "SjyvMgbJlIBn", + "outputId": "944f2c4e-405e-40bf-c5e6-377813f54775" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Batches: 0%| | 0/1 [00:00, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "a89620d6be294e82a8f62b5382833c8b" + } + }, + "metadata": {} }, - "c5d2f8d7eb8d40c8b976c43406ed0204": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0.24455547332763672\n", + "[2647.0537278942975, 2148.418285801039, 2106.1033194174197, 1961.8931482231237, 1947.3211771028728, 1864.7016069728961, 1845.0410123013535, 1795.9215277871153, 1774.0633022991497, 1706.5003995042025] [342, 66, 359, 242, 369, 10, 33, 261, 181, 230]\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "\n", + "top_k_scores, top_k_indices = instance.search(\n", + " query=\"Définir le rôle d'un intermédiaire concepteur conformément à l'article 1649 AE du Code général des Impôts.\",\n", + " top_k=10,\n", + " rescore_multiplier=4\n", + ")\n", + "\n", + "end_time = time.time()\n", + "time_taken = end_time - start_time\n", + "print(time_taken)\n", + "print(top_k_scores, top_k_indices)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 412 }, - "6848d43f27bc4ce195806fb51e741a6f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + "id": "m2hWxxDlWSx3", + "outputId": "0174e7c0-95a1-49b5-df03-5efabfd9478a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "shape: (10, 5)\n", + "┌───────┬─────────────────────────┬─────────────────────────┬────────────────────────┬─────────────┐\n", + "│ index ┆ instruction ┆ output ┆ input ┆ score │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ str ┆ str ┆ str ┆ f64 │\n", + "╞═══════╪═════════════════════════╪═════════════════════════╪════════════════════════╪═════════════╡\n", + "│ 342 ┆ Listez des exemples ┆ Des exemples d'acteurs ┆ ┆ 2647.053728 │\n", + "│ ┆ d'acteurs qu… ┆ non consi… ┆ ┆ │\n", + "│ 66 ┆ Expliquez les ┆ Conformément aux ┆ ┆ 2148.418286 │\n", + "│ ┆ circonstances dans… ┆ dispositions du… ┆ ┆ │\n", + "│ 359 ┆ Expliquez la ┆ Lorsqu'une personne ┆ Une personne physique ┆ 2106.103319 │\n", + "│ ┆ détermination de l'… ┆ physique, te… ┆ est salari… ┆ │\n", + "│ 242 ┆ Déterminez le critère ┆ Conformément à la ┆ ┆ 1961.893148 │\n", + "│ ┆ de priorit… ┆ directive 2018… ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 33 ┆ La notion ┆ Conformément à ┆ ┆ 1845.041012 │\n", + "│ ┆ d'intermédiaire est-el… ┆ l'article 1649 AE… ┆ ┆ │\n", + "│ 261 ┆ Exposition des ┆ L'article 344 G octies ┆ ┆ 1795.921528 │\n", + "│ ┆ obligations décla… ┆ A de l'an… ┆ ┆ │\n", + "│ 181 ┆ Exposez les rôles ┆ Conformément aux ┆ Une société de gestion ┆ 1774.063302 │\n", + "│ ┆ attribués aux … ┆ dispositions de… ┆ de portef… ┆ │\n", + "│ 230 ┆ Définissez clairement ┆ Conformément à ┆ La détention d’un plan ┆ 1706.5004 │\n", + "│ ┆ le concept… ┆ l'article 1649 AH… ┆ d’épargne… ┆ │\n", + "└───────┴─────────────────────────┴─────────────────────────┴────────────────────────┴─────────────┘" + ], + "text/html": [ + "\n", + "shape: (10, 5)" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "try:\n", + " dataframe = pl.from_arrow(dataset.data.table).with_row_index()\n", + "\n", + "except:\n", + " dataframe = pl.from_arrow(dataset.data.table).with_row_count(\n", + " name=\"index\"\n", + " )\n", + "\n", + "\n", + "scores_df = pl.DataFrame(\n", + " {\n", + " \"index\": top_k_indices,\n", + " \"score\": top_k_scores\n", + " }\n", + ").with_columns(\n", + " pl.col(\"index\").cast(pl.UInt32)\n", + ")\n", + "\n", + "search_results = dataframe.filter(\n", + " pl.col(\"index\").is_in(top_k_indices)\n", + ").join(\n", + " scores_df,\n", + " how=\"inner\",\n", + " on=\"index\"\n", + ")\n", + "\n", + "search_results" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ELYDV7R3V-Tx" + }, + "source": [ + "# Embeddings visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "14nt56-u1UDX" + }, + "outputs": [], + "source": [ + "visualizer = EmbeddingsVisualizer(\n", + " index_path=\"./faiss_ubinary.index\",\n", + " dataset_path=\"./dataset.hf\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-Q60-p1R2WwI", + "outputId": "e740ae20-10da-4625-8c7f-05211c108477" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "INFO:ragoon._logger:load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "INFO:ragoon._logger:Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "INFO:ragoon._logger:Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "INFO:ragoon._logger:Index total vectors: 414\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "INFO:ragoon._logger:Index code size (bytes): 96\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "INFO:ragoon._logger:Initialized binary vectors array with shape: (414, 96)\n", + "100%|██████████| 414/414 [00:00<00:00, 203616.54it/s]\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "INFO:ragoon._logger:extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "INFO:ragoon._logger:Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "PCA: 100%|██████████| 4/4 [00:00<00:00, 41.43it/s]\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "INFO:ragoon._logger:reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "INFO:ragoon._logger:Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "100%|██████████| 104/104 [00:00<00:00, 788.92it/s]\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "INFO:ragoon._logger:create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "INFO:ragoon._logger:Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "INFO:ragoon._logger:Visualization saved as embedding_visualization.html\n" + ] }, - "c182ee1819344032ad954b39d261e35e": { + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
index instruction output input score u32 str str str f64 342 "Listez des exe… "Des exemples d… "" 2647.053728 66 "Expliquez les … "Conformément a… "" 2148.418286 359 "Expliquez la d… "Lorsqu'une per… "Une personne p… 2106.103319 242 "Déterminez le … "Conformément à… "" 1961.893148 369 "Décrivez l'obl… "Si un interméd… "L’intermédiair… 1947.321177 10 "Expliquer le p… "Si l'intermédi… "" 1864.701607 33 "La notion d'in… "Conformément à… "" 1845.041012 261 "Exposition des… "L'article 344 … "" 1795.921528 181 "Exposez les rô… "Conformément a… "Une société de… 1774.063302 230 "Définissez cla… "Conformément à… "La détention d… 1706.5004 \n", + "