diff --git a/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb b/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb index 24ba48e..f449a52 100644 --- a/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb +++ b/notebooks/RAGoon_SimilaritySearch_cookbook.ipynb @@ -3,8 +3,7 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "id": "view-in-github" }, "source": [ "\"Open" @@ -12,6 +11,9 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "E1qMPnLpqcr3" + }, "source": [ "# RAGoon SimilaritySearch cookbook ⚡\n", "[![Python](https://img.shields.io/pypi/pyversions/tensorflow.svg)](https://badge.fury.io/py/tensorflow) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Maintainer](https://img.shields.io/badge/maintainer-@louisbrulenaudet-blue)\n", @@ -45,13 +47,13 @@ "## Feedback\n", "If you have any feedback, please reach out at [louisbrulenaudet@icloud.com](mailto:louisbrulenaudet@icloud.com).\n", "\n" - ], - "metadata": { - "id": "E1qMPnLpqcr3" - } + ] }, { "cell_type": "markdown", + "metadata": { + "id": "-UbYh3VCrikh" + }, "source": [ "# Installation\n", "\n", @@ -66,20 +68,17 @@ "- `huggingface_hub`: Essential for interacting with Hugging Face’s model repository, enabling easy access to pre-trained models and datasets.\n", "\n", "These dependencies work together to empower RAGoon with advanced capabilities in natural language processing, machine learning, and web data processing, making it a versatile tool for developers and researchers in AI." - ], - "metadata": { - "id": "-UbYh3VCrikh" - } + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u4Bq23-p34KP", - "outputId": "0f8edae4-fb9f-4faa-a7a9-b93a5ca0233b" + "outputId": "65f6250e-131d-4786-9d36-eee83f8af12d" }, "outputs": [ { @@ -87,7 +86,8 @@ "name": "stdout", "text": [ "Collecting ragoon\n", - " Downloading ragoon-0.0.8-py3-none-any.whl.metadata (7.7 kB)\n", + " Downloading ragoon-0.0.13-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: polars in /usr/local/lib/python3.10/dist-packages (0.20.2)\n", "Requirement already satisfied: beautifulsoup4==4.12.3 in /usr/local/lib/python3.10/dist-packages (from ragoon) (4.12.3)\n", "Collecting datasets==2.20.0 (from ragoon)\n", " Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)\n", @@ -129,7 +129,7 @@ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from ragoon) (4.42.4)\n", "Collecting tqdm==4.66.4 (from ragoon)\n", " Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting umap==0.1.1 (from ragoon)\n", " Downloading umap-0.1.1.tar.gz (3.2 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", @@ -262,47 +262,47 @@ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.3.1->ragoon) (1.3.0)\n", "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0->google-api-python-client==2.126.0->ragoon) (0.6.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==2.20.0->ragoon) (1.16.0)\n", - "Downloading ragoon-0.0.8-py3-none-any.whl (37 kB)\n", + "Downloading ragoon-0.0.13-py3-none-any.whl (38 kB)\n", "Downloading datasets-2.20.0-py3-none-any.whl (547 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading google_api_python_client-2.126.0-py2.py3-none-any.whl (12.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.6/12.6 MB\u001b[0m \u001b[31m73.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.6/12.6 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading groq-0.9.0-py3-none-any.whl (103 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.5/103.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.5/103.5 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading huggingface_hub-0.24.2-py3-none-any.whl (417 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.2/417.2 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.2/417.2 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading myst_parser-3.0.1-py3-none-any.whl (83 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.2/83.2 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.2/83.2 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading numpydoc-1.7.0-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading openai-1.37.1-py3-none-any.whl (337 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.0/337.0 kB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.0/337.0 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading plotly-5.23.0-py3-none-any.whl (17.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m36.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pydata_sphinx_theme-0.15.4-py3-none-any.whl (4.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m71.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pytest-8.3.2-py3-none-any.whl (341 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.4/13.4 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.4/13.4 MB\u001b[0m \u001b[31m47.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sphinx-7.4.7-py3-none-any.whl (3.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m70.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sphinx_book_theme-1.1.3-py3-none-any.whl (430 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.1/430.1 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.1/430.1 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tqdm-4.66.4-py3-none-any.whl (78 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading umap_learn-0.5.6-py3-none-any.whl (85 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading usearch-2.12.0-cp310-cp310-manylinux_2_28_x86_64.whl (1.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m54.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m53.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", "Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", "Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", @@ -315,32 +315,32 @@ "Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", "Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", "Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m423.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m164.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading docutils-0.21.2-py3-none-any.whl (587 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m587.4/587.4 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m587.4/587.4 kB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.5.0-py3-none-any.whl (316 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pygments-2.18.0-py3-none-any.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m58.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading accessible_pygments-0.0.5-py3-none-any.whl (1.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m66.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl (19.7 MB)\n", "Building wheels for collected packages: overload, umap\n", " Building wheel for overload (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for overload: filename=overload-1.1-py3-none-any.whl size=5675 sha256=bd134871ea1dd33588cb0eb38faa5141ef6e5bf1581a2df164a784a16e4f7fee\n", + " Created wheel for overload: filename=overload-1.1-py3-none-any.whl size=5675 sha256=fc19f1374f24912b92374b22f9f5b3e729044ca044e7fff2217bc49eab1a5d89\n", " Stored in directory: /root/.cache/pip/wheels/c2/bd/04/b71278036f82f85e09d62b31d780f87df6f2a2dd378a185b3e\n", " Building wheel for umap (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for umap: filename=umap-0.1.1-py3-none-any.whl size=3542 sha256=64a33bfe9c627bd2873973ff4d15a11bd54edb51ca0ae728bb1e2e868e4cb9ff\n", + " Created wheel for umap: filename=umap-0.1.1-py3-none-any.whl size=3542 sha256=6a79b33adc6299e7cf3ef7d1567da3a10ce1694b726eada5ccc5435034949c7f\n", " Stored in directory: /root/.cache/pip/wheels/15/f1/28/53dcf7a309118ed35d810a5f9cb995217800f3f269ab5771cb\n", "Successfully built overload umap\n", "Installing collected packages: umap, overload, xxhash, tqdm, pytest, pygments, pyarrow, plotly, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, h11, fsspec, faiss-cpu, docutils, dill, usearch, sphinx, scikit-learn, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, huggingface-hub, httpcore, accessible-pygments, pynndescent, pydata-sphinx-theme, nvidia-cusolver-cu12, numpydoc, myst-parser, httpx, umap-learn, sphinx-book-theme, openai, groq, google-api-python-client, datasets, sentence-transformers, ragoon\n", @@ -393,7 +393,7 @@ "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", "gcsfs 2024.6.1 requires fsspec==2024.6.1, but you have fsspec 2024.5.0 which is incompatible.\n", "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed accessible-pygments-0.0.5 datasets-2.20.0 dill-0.3.8 docutils-0.21.2 faiss-cpu-1.8.0 fsspec-2024.5.0 google-api-python-client-2.126.0 groq-0.9.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 huggingface-hub-0.24.2 multiprocess-0.70.16 myst-parser-3.0.1 numpydoc-1.7.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.20 nvidia-nvtx-cu12-12.1.105 openai-1.37.1 overload-1.1 plotly-5.23.0 pyarrow-17.0.0 pydata-sphinx-theme-0.15.4 pygments-2.18.0 pynndescent-0.5.13 pytest-8.3.2 ragoon-0.0.8 scikit-learn-1.5.1 sentence-transformers-3.0.1 sphinx-7.4.7 sphinx-book-theme-1.1.3 tqdm-4.66.4 umap-0.1.1 umap-learn-0.5.6 usearch-2.12.0 xxhash-3.4.1\n" + "\u001b[0mSuccessfully installed accessible-pygments-0.0.5 datasets-2.20.0 dill-0.3.8 docutils-0.21.2 faiss-cpu-1.8.0 fsspec-2024.5.0 google-api-python-client-2.126.0 groq-0.9.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 huggingface-hub-0.24.2 multiprocess-0.70.16 myst-parser-3.0.1 numpydoc-1.7.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.20 nvidia-nvtx-cu12-12.1.105 openai-1.37.1 overload-1.1 plotly-5.23.0 pyarrow-17.0.0 pydata-sphinx-theme-0.15.4 pygments-2.18.0 pynndescent-0.5.13 pytest-8.3.2 ragoon-0.0.13 scikit-learn-1.5.1 sentence-transformers-3.0.1 sphinx-7.4.7 sphinx-book-theme-1.1.3 tqdm-4.66.4 umap-0.1.1 umap-learn-0.5.6 usearch-2.12.0 xxhash-3.4.1\n" ] } ], @@ -403,14 +403,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": { "id": "r_zE8a8z4HMV" }, "outputs": [], "source": [ "import polars as pl\n", - "\n", + "import time\n", "from ragoon import (\n", " dataset_loader,\n", " SimilaritySearch,\n", @@ -420,19 +420,24 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "Sb6QMUxtMC7x" + }, "source": [ "# Instance creation\n", "\n", "The `SimilaritySearch` class is instantiated with specific parameters to configure the embedding model and search infrastructure. The chosen model, `louisbrulenaudet/tsdae-lemone-mbert-base`, is likely a multilingual BERT model fine-tuned with TSDAE (Transfomer-based Denoising Auto-Encoder) on a custom dataset. This model choice suggests a focus on multilingual capabilities and improved semantic representations.\n", "\n", "The `cuda` device specification leverages GPU acceleration, crucial for efficient processing of large datasets. The embedding dimension of `768` is typical for BERT-based models, representing a balance between expressiveness and computational efficiency. The `ip` (inner product) metric is selected for similarity comparisons, which is computationally faster than cosine similarity when vectors are normalized. The `i8` dtype indicates 8-bit integer quantization, a technique that significantly reduces memory usage and speeds up similarity search at the cost of a small accuracy rade-off." - ], - "metadata": { - "id": "Sb6QMUxtMC7x" - } + ] }, { "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "DO8LihEaL9Es" + }, + "outputs": [], "source": [ "instance = SimilaritySearch(\n", " model_name=\"louisbrulenaudet/tsdae-lemone-mbert-base\",\n", @@ -441,246 +446,87 @@ " metric=\"ip\",\n", " dtype=\"i8\"\n", ")" - ], + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zHehvJOdstmv" + }, + "source": [ + "## Dataset download and embeddings generation\n", + "\n", + "The encode method transforms raw text into dense vector representations. This process involves tokenization, where text is split into subword units, followed by passing these tokens through the neural network layers of the SentenceTransformer model. The resulting embeddings capture semantic information in a high-dimensional space, where similar concepts are positioned closer together. The method likely uses batching to efficiently process large datasets and may employ techniques like length sorting to optimize padding and reduce computational waste." + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 491, + "height": 214, "referenced_widgets": [ - "3b0740b7c1184b11af19a2575c2028d7", - "ed036c7e98d34ca1b1fefb5f7d7da690", - "041dcd84dd8047b8bb00dcf0a8605366", - "6ffed718be9d47b08b0f319823627692", - "bd764b4c8d3a4218a51ac6cfa016b176", - "459b6bd8f7804c6fa2cfbbf3574fea81", - "310db8d9db6e4b05b3340d9ac4e6a885", - "454e18ed66a543e88829d0eb1c7585dd", - "341d5f623a4e4d97a0b02a8b1f8372fa", - "bf1831cf76294173ad2f4506bab90601", - "d840ecb454ce401b9781ca181204320f", - "990877dd1448431ca676da862de76e66", - "215f503d83524f22b534383c76bb7aa5", - "57d8c511f130440ab43cd26946d73484", - "6ad57261fbcc4033bcf3c56e01f14099", - "9d590a1002364797a243818665f95cea", - "31967af398094a5b8003a78d7cdef06a", - "ee1e1101bcae43f8b938b70758635e28", - "fcadc705920c4dc0a965eff5b1de8088", - "9cedabb67be046b7b0dfb7ca89c8e299", - "91962d7fbd814ae0b6a6e10072f621bc", - "910e040f53c246048d85236b525d7e95", - "017b2cc0c5594dc6a4624b0bec38fe73", - "476cac6399c142efa95e85a5b159dd20", - "ce8b3efd425e43158a2b55f8e47b8d56", - "1fe1c159c31a4f8dba9beb0c53e49bbc", - "f121aa8e2b4a41feb4e7966f1a697d6f", - "66abd6f0b54c47548f3f8074d9af2af8", - "8347c3e484fa43b785c2a1392d4bf1aa", - "5a1a0dd9fd9f4f0195f8ead413dfd1a9", - "ef52f1ca67554e29a478abcd91eae44f", - "2629efe6d2044c29a22d9736b71523f4", - "2c0e040a88e948a2835e07dd3fac05f0", - "5d298e5197c94cb183ca668444f8ca81", - "1f4ea0c70e9c4923b7de5e307c01fcd5", - "8527969d2f9d4b8da907cb8d586c27eb", - "a10b2bca915841479e634c16af092d00", - "da023e01ccb34cd6a0d63ac05c4caf0f", - "cc58183628524f24b2f1cbe66527cf03", - "77a0828fab334bf280fde83577adbc36", - "874f00bd481e40c0802f3b6312d1e7c1", - "aab5b68dd3ce4d019b582188871e937e", - "f51d2b0311ad4fa895f1c7c5c2c2ecee", - "910a5796ac244e74b3fcd189166a7279", - "897fb40e1b0347a089aa4cb0fab582f3", - "744c5ae12aac4e9faa4a4845af24e3e9", - "c4616271c618426cbe1043dd0ef541a5", - "c245609de8ea43bb88c500e484638238", - "60fd5071bae7484f991e1fc6aca170de", - "de77ef90160f4d89a01e99fab514e0bb", - "ceb21ab3f6ef43c68642439fa948590c", - "141eb847045b48f69698e36bbff54e9e", - "0f598dd46d844505a64a85ccb22aa462", - "87d22f7577a94256aeca8146b6797101", - "a0c1f63835ea4700b5d59de386045f82", - "6b07d0ea362e442fb4a9f46fa66b8155", - "4d8991a173e5485a966fc51a0d58222d", - "d57b5e6e0017437db70929616ad041d7", - "9bbe6c83fe2f4bb6817e3177006af255", - "cbcb40306ff84710983dfc47b3ed0ad0", - "76bd90a147dd4d3da5b7bcb1bd759b9d", - "a3ee7025242c48aea17ce4935c467742", - "4267bf1283a04abda6e7877b43d04b52", - "0bdddb3424f44dbe97674c45488dd4eb", - "8330426f6ff140b6aba7190543688c2f", - "0fabe9fd86344ae1928752e6b4abc883", - "78a210fcb03a4450b34e73b7f4dc2280", - "ccb12bd8b80242c38f644fc73bc4edd5", - "8b647e9a5eb84dada7715f764258a7df", - "4398e38513ae40d5a0a9399c89fd60a7", - "fee8560569fa4c8797ce9d25fe587bff", - "3ff7fe78226542c4adc84898179249a6", - "0c959e89a32e42d8885bb9168ab3036f", - "bc7a5be54a8e43fc92a49b7c812b8a48", - "f8af455ea779482cab33235f505fe5ef", - "1e2db34d262f41cabd288f1100fed025", - "d26d7c429d9a4e80aba1c7de6e7e64f0", - "558cf7b57e9c4f729d834ac6d5ef6bfe", - "4cf43d2ded134c06ab1df7509d226175", - "14e16de286304a94a75960fe47176237", - "c435a072016d4dc3b30e409f2410450e", - "b4295cdb89d14877b60eb45c913f7409", - "91db42f83e454d1baf20cc1e0af0e1e1", - "227f6358cc4a47b3b6da3001a0e3fdbf", - "cf4bae6c34b34d3abeaa1a77454f4097", - "834f4a6d80eb4c1a8a3cf6276e071553", - "c5d2f8d7eb8d40c8b976c43406ed0204", - "6848d43f27bc4ce195806fb51e741a6f", - "c182ee1819344032ad954b39d261e35e", - "b20264f18ecc4f89bdb2ad9ec3052d0f", - "0ab071ebac92435791261ebebca4d103", - "115636ffa31a4212a2bb7294733744f3", - "c8b243e465c74845b21ce64729bd7e34", - "09d7e4295da04cc68569f096f60dc0da", - "18a5275ac2864347ba605046e798f344", - "a193ba8df21d4434a83079faaace6a16", - "810c7730004b4d92adde4a835b9e8896", - "9c17c34bca294f6ab92dc2973ad831bd", - "9beabf1daab340ae82a12ae4ad44defd", - "0d52cd598ddb4f63ab68ebb3a1672936", - "a7378ef70c964373873ac13eb9188af7", - "532eeb99350e444fad7fe3f659e118cb", - "99f18945bfa4496badf1b6398eced269", - "26924bd271a34fd3b6bc4cd4ef7d0fc8", - "1069c57ab76349acb1df1e267836877b", - "0d5f036ec3d84483b88de5c312c24631", - "b35af0a4b0394d6ca66388187a12b9c9", - "fd08587944f34eb2a2e72d6ebdf11dc9", - "5f8560ee9dea418a85f693dd4f771a72", - "961a5e9e866f4a0aa772e0395d2c5cc4", - "9ba34593f7c449acacde80b276e47e5e", - "b227d4b242384297883d73e2ef4ad36b", - "2aa6f24c423a4c9fbae4c9e2f59cd1ef", - "13147575bc324653ad03fe2b764c0f30", - "622c32270e6548ed9514345c0c1450cb", - "15f0540100a44d1cada3d2f6a0e927d4", - "183376a937604feca5a8fe1e01e8d8e6", - "e192d2a4b79b45ff858d90751fc7ebea", - "2693cf39ba874fff92e1b5294e796ed7", - "a070c177e8f94802a073d9be6a3226b6", - "984117efb8c548ccb2debd825d89d156" + "f966ff197d7147688f48326f68d08cac", + "eb8046d4485748a1ace93506f560f114", + "1147ef8de9ae47a098f1c369880720dd", + "1353cebec98241ce9f5e7c700c3e58fd", + "f8aad3b7486948489bd304ef8da5e3c5", + "e1dc1f8d581a4204aca2239e08045a15", + "3c21d198cbe14899b40398ac27f1ab77", + "3afcc81fbcf14f45a16c574ef11cb106", + "2d08b11a9ba04abbb662bf8f7c2e21c4", + "c7479f174879421c86f01a13b5f80bf4", + "b2ec1015b8f448c698f7b6e427bf4cea", + "18760a4be43c42089887daf6fc311a0b", + "190d3143f30e481984b35f159712a7b9", + "2f3843ac77fa463a99e7d205e0249730", + "bdea1ad25a054c9c9ef5341ef4452926", + "cd0cd1f71b9d42078da17e373af328cf", + "546384f6947040dfad88f3c56974fc17", + "5afa2b501e0947cd81e2e16aa3482643", + "004f0664294e4090b753d92bbbc8e749", + "4958d65528a844608ea6e6be77787f4c", + "144d3a9883ac4d8086d2c018b1779f8d", + "4780eed53cbf49f4a7d0062e301f7414", + "b8d2085d254a4f8881287ad4e9b9ba63", + "4350660466954ab985d7c2b5c2d50dfb", + "bec06e5cb6b245d2b1b01dfe5444109f", + "ae62f3c097754596ac8f7fac5cc1a619", + "f8587c86967343159b70520ca0e545d0", + "d972ef0e134d469b8c8cb279e07b7c20", + "cc9c4fef61984633a8d259dc3130230d", + "40d840d499ff42b98c178dad6498ce94", + "2e9e79856a724a35b8b2a5f7d255b68d", + "fab1c206f95e40a7b5404d46a1e36675", + "55faf3f1a3c24d70be75a9f60f74ee56", + "ea5a2db561964901958dabafbb52ad3e", + "db45b8f6aa3c41e7a507fe2a4311e4d3", + "6196383847cd455a8c1b3d64dd64f374", + "710a214347d34ef689b2cf79d5f32a66", + "f71910c3ac824cd59f0b07a2c2ff0de8", + "e5a2126f30f443e6815db58bf215c67b", + "3dc1b67aa00b40b696da039eff4c0780", + "c4b169eb0424470ba9fb64cafb547cdd", + "beced8d9f1404627833bc549b34d78e7", + "7adeb43a7c134edfa73aa5b9f47b1382", + "69a325bec0a647a5b76fd8480392a581" ] }, - "id": "DO8LihEaL9Es", - "outputId": "05761d16-36e5-4278-c36b-f883b46a39fd" + "id": "JeOvejs-LSh5", + "outputId": "a3e13639-478e-4aef-a8ef-0ef021e328b3" }, - "execution_count": null, "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "modules.json: 0%| | 0.00/229 [00:00\n", - "
ndarray (414, 96) 
array([[152,  86,  70, ..., 173, 112,  84],\n",
+              "      
ndarray (414, 96) 
array([[152,  86,  70, ..., 173, 112,  84],\n",
               "       [ 42, 215, 109, ...,  13,  60, 198],\n",
               "       [136, 151, 117, ...,  77, 208,  22],\n",
               "       ...,\n",
@@ -950,10 +722,10 @@
               "      (() => {\n",
               "      const titles = ['show data', 'hide data'];\n",
               "      let index = 0\n",
-              "      document.querySelector('#id-5c68268b-af3f-49a0-ad21-654971335804 button').onclick = (e) => {\n",
-              "        document.querySelector('#id-5c68268b-af3f-49a0-ad21-654971335804').classList.toggle('show_array');\n",
+              "      document.querySelector('#id-cbca854f-e08d-4453-8ff7-3bb69d77de46 button').onclick = (e) => {\n",
+              "        document.querySelector('#id-cbca854f-e08d-4453-8ff7-3bb69d77de46').classList.toggle('show_array');\n",
               "        index = (++index) % 2;\n",
-              "        document.querySelector('#id-5c68268b-af3f-49a0-ad21-654971335804 button').textContent = titles[index];\n",
+              "        document.querySelector('#id-cbca854f-e08d-4453-8ff7-3bb69d77de46 button').textContent = titles[index];\n",
               "        e.preventDefault();\n",
               "        e.stopPropagation();\n",
               "      }\n",
@@ -964,37 +736,37 @@
           "metadata": {},
           "execution_count": 8
         }
+      ],
+      "source": [
+        "ubinary_embeddings = instance.quantize_embeddings(\n",
+        "    embeddings=embeddings,\n",
+        "    quantization_type=\"ubinary\"\n",
+        ")\n",
+        "\n",
+        "ubinary_embeddings"
       ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "KSWoo96YVlpd"
+      },
       "source": [
         "# Quantizing embeddings to 8-bit integers\n",
         "\n",
         "Int8 quantization maps the continuous embedding values to a discrete set of 256 values represented by 8-bit integers. This process typically involves scaling the original values to fit within the int8 range (-128 to 127) and may use techniques like asymmetric quantization to preserve more information. While less extreme than binary quantization, int8 still offers substantial memory savings (reducing each dimension to 1 byte) while preserving more of the original information. This quantization enables efficient SIMD (Single Instruction, Multiple Data) operations on modern CPUs, significantly accelerating similarity computations."
-      ],
-      "metadata": {
-        "id": "KSWoo96YVlpd"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "int8_embeddings = instance.quantize_embeddings(\n",
-        "    embeddings=embeddings,\n",
-        "    quantization_type=\"int8\"\n",
-        ")\n",
-        "\n",
-        "int8_embeddings"
-      ],
+      "execution_count": 9,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "SCEiWKlXVyAO",
-        "outputId": "cac70c19-8441-4872-9cf3-afdba953bbaa"
+        "outputId": "20f5d463-1874-4aad-811b-16150468dbab"
       },
-      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1012,36 +784,37 @@
           "metadata": {},
           "execution_count": 9
         }
+      ],
+      "source": [
+        "int8_embeddings = instance.quantize_embeddings(\n",
+        "    embeddings=embeddings,\n",
+        "    quantization_type=\"int8\"\n",
+        ")\n",
+        "\n",
+        "int8_embeddings"
       ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "fA8GqkFxk9AF"
+      },
       "source": [
         "# Creating a USEARCH index\n",
         "\n",
         "USEARCH is designed for high-performance approximate nearest neighbor search. The index creation process likely involves building a hierarchical structure, possibly a navigable small world (NSW) graph, which allows for efficient traversal during search operations. The use of int8 quantized embeddings enables USEARCH to leverage SIMD instructions for rapid distance calculations. The resulting index balances search speed and accuracy, allowing for fast retrieval with a controlled trade-off in precision."
-      ],
-      "metadata": {
-        "id": "fA8GqkFxk9AF"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "instance.create_usearch_index(\n",
-        "    int8_embeddings=int8_embeddings,\n",
-        "    index_path=\"./usearch_int8.index\",\n",
-        "    save=True\n",
-        ")"
-      ],
+      "execution_count": 10,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "Uyt4yUfCV3Jy",
-        "outputId": "29ba3af3-841d-4079-e7c6-bd8033221c88"
+        "outputId": "065fdf6b-b131-4fb8-b131-b1ad23956364"
       },
-      "execution_count": 17,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1071,2935 +844,424 @@
             ]
           },
           "metadata": {},
-          "execution_count": 17
+          "execution_count": 10
         }
+      ],
+      "source": [
+        "instance.create_usearch_index(\n",
+        "    int8_embeddings=int8_embeddings,\n",
+        "    index_path=\"./usearch_int8.index\",\n",
+        "    save=True\n",
+        ")"
       ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "bu4tRZdnlDxe"
+      },
       "source": [
         "# Creating a FAISS index\n",
         "\n",
         "FAISS (Facebook AI Similarity Search) is a library that provides efficient similarity search and clustering of dense vectors. For binary vectors, FAISS typically uses specialized index structures like the BinaryFlat index. This index performs exhaustive search using Hamming distance, which can be computed extremely efficiently on modern hardware using XOR and bit count operations. The binary nature of the index allows for compact storage and very fast search operations, albeit with reduced granularity in similarity scores compared to float-based indices."
-      ],
-      "metadata": {
-        "id": "bu4tRZdnlDxe"
-      }
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "id": "aT6qW9OZlDFZ"
+      },
+      "outputs": [],
       "source": [
         "instance.create_faiss_index(\n",
         "    ubinary_embeddings=ubinary_embeddings,\n",
         "    index_path=\"./faiss_ubinary.index\",\n",
         "    save=True\n",
         ")"
-      ],
-      "metadata": {
-        "id": "aT6qW9OZlDFZ"
-      },
-      "execution_count": 16,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "uhPo-7y4lJyZ"
+      },
       "source": [
         "# Performing a similarity search\n",
         "\n",
         "The search process combines the strengths of both USEARCH and FAISS indices. It likely first uses the binary FAISS index for a rapid initial filtering step, leveraging the efficiency of Hamming distance calculations. The top candidates from this step (increased by the rescore_multiplier for better recall) are then refined using the more precise int8 USEARCH index. This two-stage approach balances speed and accuracy, allowing for quick pruning of unlikely candidates followed by more accurate rescoring.\n",
         "\n",
         "The query is first encoded using the same model and quantization processes as the corpus. The rescore_multiplier of 4 means the initial retrieval fetches 40 candidates (4 * top_k), which are then reranked to produce the final top 10 results. This oversampling helps mitigate the potential loss of relevant results due to quantization approximations."
-      ],
-      "metadata": {
-        "id": "uhPo-7y4lJyZ"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "top_k_scores, top_k_indices = instance.search(\n",
-        "    query=\"Définir le rôle d'un intermédiaire concepteur conformément à l'article 1649 AE du Code général des Impôts.\",\n",
-        "    top_k=10,\n",
-        "    rescore_multiplier=4\n",
-        ")\n",
-        "print(top_k_scores, top_k_indices)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 86,
-          "referenced_widgets": [
-            "1b7a1c758c9844f6bb3297f5e907b125",
-            "a99fdb97b94b42b1ae2ede146cc02512",
-            "ce8efeb255504bcd8797022b70c89ac0",
-            "769a7ab752aa41a297ffa878f3b717b6",
-            "902ca7262ca94a7180f522df6b9b54f9",
-            "5f85ba1a93d04dbda65acab723e60ba2",
-            "125128e3613f40feb8153e3dd9e3b3aa",
-            "c2d320a7c814446db7159feeb907b6a2",
-            "f5d20157226846fd983f11904cf9eecf",
-            "fb117e7bda4a445ba65b472a64971440",
-            "ffcdbc40630a4056a8929fb5a8a88cdd"
-          ]
-        },
-        "id": "SjyvMgbJlIBn",
-        "outputId": "b24a2110-9631-4a20-a38f-cb72d137d698"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Batches:   0%|          | 0/1 [00:00\n",
+              "shape: (10, 5)
indexinstructionoutputinputscore
u32strstrstrf64
342"Listez des exe…"Des exemples d…""2647.053728
66"Expliquez les …"Conformément a…""2148.418286
359"Expliquez la d…"Lorsqu'une per…"Une personne p…2106.103319
242"Déterminez le …"Conformément à…""1961.893148
369"Décrivez l'obl…"Si un interméd…"L’intermédiair…1947.321177
10"Expliquer le p…"Si l'intermédi…""1864.701607
33"La notion d'in…"Conformément à…""1845.041012
261"Exposition des…"L'article 344 …""1795.921528
181"Exposez les rô…"Conformément a…"Une société de…1774.063302
230"Définissez cla…"Conformément à…"La détention d…1706.5004
" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "try:\n", + " dataframe = pl.from_arrow(dataset.data.table).with_row_index()\n", + "\n", + "except:\n", + " dataframe = pl.from_arrow(dataset.data.table).with_row_count(\n", + " name=\"index\"\n", + " )\n", + "\n", + "\n", + "scores_df = pl.DataFrame(\n", + " {\n", + " \"index\": top_k_indices,\n", + " \"score\": top_k_scores\n", + " }\n", + ").with_columns(\n", + " pl.col(\"index\").cast(pl.UInt32)\n", + ")\n", + "\n", + "search_results = dataframe.filter(\n", + " pl.col(\"index\").is_in(top_k_indices)\n", + ").join(\n", + " scores_df,\n", + " how=\"inner\",\n", + " on=\"index\"\n", + ")\n", + "\n", + "search_results" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ELYDV7R3V-Tx" + }, + "source": [ + "# Embeddings visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "14nt56-u1UDX" + }, + "outputs": [], + "source": [ + "visualizer = EmbeddingsVisualizer(\n", + " index_path=\"./faiss_ubinary.index\",\n", + " dataset_path=\"./dataset.hf\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-Q60-p1R2WwI", + "outputId": "e740ae20-10da-4625-8c7f-05211c108477" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,592 - INFO - load_index took 0.0027 seconds to execute.\n", + "INFO:ragoon._logger:load_index took 0.0027 seconds to execute.\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,600 - INFO - Memory Usage Report for 'load_index':\n", + "INFO:ragoon._logger:Memory Usage Report for 'load_index':\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,608 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,634 - INFO - Expected dimension (bits): 768\n", + "INFO:ragoon._logger:Expected dimension (bits): 768\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "2024-08-13 03:38:18,656 - INFO - Index total vectors: 414\n", + "INFO:ragoon._logger:Index total vectors: 414\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "2024-08-13 03:38:18,662 - INFO - Index code size (bytes): 96\n", + "INFO:ragoon._logger:Index code size (bytes): 96\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "2024-08-13 03:38:18,668 - INFO - Initialized binary vectors array with shape: (414, 96)\n", + "INFO:ragoon._logger:Initialized binary vectors array with shape: (414, 96)\n", + "100%|██████████| 414/414 [00:00<00:00, 203616.54it/s]\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,692 - INFO - extract_vectors took 0.0582 seconds to execute.\n", + "INFO:ragoon._logger:extract_vectors took 0.0582 seconds to execute.\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,702 - INFO - Memory Usage Report for 'extract_vectors':\n", + "INFO:ragoon._logger:Memory Usage Report for 'extract_vectors':\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,713 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "PCA: 100%|██████████| 4/4 [00:00<00:00, 41.43it/s]\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,846 - INFO - reduce_dimensionality took 0.1168 seconds to execute.\n", + "INFO:ragoon._logger:reduce_dimensionality took 0.1168 seconds to execute.\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,854 - INFO - Memory Usage Report for 'reduce_dimensionality':\n", + "INFO:ragoon._logger:Memory Usage Report for 'reduce_dimensionality':\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:18,861 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "100%|██████████| 104/104 [00:00<00:00, 788.92it/s]\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,348 - INFO - create_plot took 0.4795 seconds to execute.\n", + "INFO:ragoon._logger:create_plot took 0.4795 seconds to execute.\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,369 - INFO - Memory Usage Report for 'create_plot':\n", + "INFO:ragoon._logger:Memory Usage Report for 'create_plot':\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "2024-08-13 03:38:19,378 - INFO - Memory Used: 0.00 MB\n", + "INFO:ragoon._logger: Memory Used: 0.00 MB\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "2024-08-13 03:38:20,444 - INFO - Visualization saved as embedding_visualization.html\n", + "INFO:ragoon._logger:Visualization saved as embedding_visualization.html\n" + ] }, - "c182ee1819344032ad954b39d261e35e": { + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "visualizer.visualize(\n", + " column=\"output\",\n", + " method=\"pca\",\n", + " save_html=True,\n", + " html_file_name=\"embedding_visualization.html\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d5DKrN3atvGA" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "f966ff197d7147688f48326f68d08cac": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -4014,14 +1276,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_b20264f18ecc4f89bdb2ad9ec3052d0f", - "IPY_MODEL_0ab071ebac92435791261ebebca4d103", - "IPY_MODEL_115636ffa31a4212a2bb7294733744f3" + "IPY_MODEL_eb8046d4485748a1ace93506f560f114", + "IPY_MODEL_1147ef8de9ae47a098f1c369880720dd", + "IPY_MODEL_1353cebec98241ce9f5e7c700c3e58fd" ], - "layout": "IPY_MODEL_c8b243e465c74845b21ce64729bd7e34" + "layout": "IPY_MODEL_f8aad3b7486948489bd304ef8da5e3c5" } }, - "b20264f18ecc4f89bdb2ad9ec3052d0f": { + "eb8046d4485748a1ace93506f560f114": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -4036,13 +1298,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_09d7e4295da04cc68569f096f60dc0da", + "layout": "IPY_MODEL_e1dc1f8d581a4204aca2239e08045a15", "placeholder": "​", - "style": "IPY_MODEL_18a5275ac2864347ba605046e798f344", - "value": "tokenizer.json: 100%" + "style": "IPY_MODEL_3c21d198cbe14899b40398ac27f1ab77", + "value": "Downloading readme: 100%" } }, - "0ab071ebac92435791261ebebca4d103": { + "1147ef8de9ae47a098f1c369880720dd": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -4058,15 +1320,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_a193ba8df21d4434a83079faaace6a16", - "max": 2563623, + "layout": "IPY_MODEL_3afcc81fbcf14f45a16c574ef11cb106", + "max": 3732, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_810c7730004b4d92adde4a835b9e8896", - "value": 2563623 + "style": "IPY_MODEL_2d08b11a9ba04abbb662bf8f7c2e21c4", + "value": 3732 } }, - "115636ffa31a4212a2bb7294733744f3": { + "1353cebec98241ce9f5e7c700c3e58fd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -4081,13 +1343,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_9c17c34bca294f6ab92dc2973ad831bd", + "layout": "IPY_MODEL_c7479f174879421c86f01a13b5f80bf4", "placeholder": "​", - "style": "IPY_MODEL_9beabf1daab340ae82a12ae4ad44defd", - "value": " 2.56M/2.56M [00:00<00:00, 9.70MB/s]" + "style": "IPY_MODEL_b2ec1015b8f448c698f7b6e427bf4cea", + "value": " 3.73k/3.73k [00:00<00:00, 103kB/s]" } }, - "c8b243e465c74845b21ce64729bd7e34": { + "f8aad3b7486948489bd304ef8da5e3c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4139,7 +1401,7 @@ "width": null } }, - "09d7e4295da04cc68569f096f60dc0da": { + "e1dc1f8d581a4204aca2239e08045a15": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4191,7 +1453,7 @@ "width": null } }, - "18a5275ac2864347ba605046e798f344": { + "3c21d198cbe14899b40398ac27f1ab77": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -4206,7 +1468,7 @@ "description_width": "" } }, - "a193ba8df21d4434a83079faaace6a16": { + "3afcc81fbcf14f45a16c574ef11cb106": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4258,7 +1520,7 @@ "width": null } }, - "810c7730004b4d92adde4a835b9e8896": { + "2d08b11a9ba04abbb662bf8f7c2e21c4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -4274,7 +1536,7 @@ "description_width": "" } }, - "9c17c34bca294f6ab92dc2973ad831bd": { + "c7479f174879421c86f01a13b5f80bf4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4326,7 +1588,7 @@ "width": null } }, - "9beabf1daab340ae82a12ae4ad44defd": { + "b2ec1015b8f448c698f7b6e427bf4cea": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -4341,7 +1603,7 @@ "description_width": "" } }, - "0d52cd598ddb4f63ab68ebb3a1672936": { + "18760a4be43c42089887daf6fc311a0b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -4356,14 +1618,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_a7378ef70c964373873ac13eb9188af7", - "IPY_MODEL_532eeb99350e444fad7fe3f659e118cb", - "IPY_MODEL_99f18945bfa4496badf1b6398eced269" + "IPY_MODEL_190d3143f30e481984b35f159712a7b9", + "IPY_MODEL_2f3843ac77fa463a99e7d205e0249730", + "IPY_MODEL_bdea1ad25a054c9c9ef5341ef4452926" ], - "layout": "IPY_MODEL_26924bd271a34fd3b6bc4cd4ef7d0fc8" + "layout": "IPY_MODEL_cd0cd1f71b9d42078da17e373af328cf" } }, - "a7378ef70c964373873ac13eb9188af7": { + "190d3143f30e481984b35f159712a7b9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -4378,13 +1640,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_1069c57ab76349acb1df1e267836877b", + "layout": "IPY_MODEL_546384f6947040dfad88f3c56974fc17", "placeholder": "​", - "style": "IPY_MODEL_0d5f036ec3d84483b88de5c312c24631", - "value": "special_tokens_map.json: 100%" + "style": "IPY_MODEL_5afa2b501e0947cd81e2e16aa3482643", + "value": "Downloading data: 100%" } }, - "532eeb99350e444fad7fe3f659e118cb": { + "2f3843ac77fa463a99e7d205e0249730": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -4400,15 +1662,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_b35af0a4b0394d6ca66388187a12b9c9", - "max": 125, + "layout": "IPY_MODEL_004f0664294e4090b753d92bbbc8e749", + "max": 389235, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_fd08587944f34eb2a2e72d6ebdf11dc9", - "value": 125 + "style": "IPY_MODEL_4958d65528a844608ea6e6be77787f4c", + "value": 389235 } }, - "99f18945bfa4496badf1b6398eced269": { + "bdea1ad25a054c9c9ef5341ef4452926": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -4423,13 +1685,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_5f8560ee9dea418a85f693dd4f771a72", + "layout": "IPY_MODEL_144d3a9883ac4d8086d2c018b1779f8d", "placeholder": "​", - "style": "IPY_MODEL_961a5e9e866f4a0aa772e0395d2c5cc4", - "value": " 125/125 [00:00<00:00, 5.41kB/s]" + "style": "IPY_MODEL_4780eed53cbf49f4a7d0062e301f7414", + "value": " 389k/389k [00:00<00:00, 3.27MB/s]" } }, - "26924bd271a34fd3b6bc4cd4ef7d0fc8": { + "cd0cd1f71b9d42078da17e373af328cf": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4481,7 +1743,7 @@ "width": null } }, - "1069c57ab76349acb1df1e267836877b": { + "546384f6947040dfad88f3c56974fc17": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4533,7 +1795,7 @@ "width": null } }, - "0d5f036ec3d84483b88de5c312c24631": { + "5afa2b501e0947cd81e2e16aa3482643": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -4548,7 +1810,7 @@ "description_width": "" } }, - "b35af0a4b0394d6ca66388187a12b9c9": { + "004f0664294e4090b753d92bbbc8e749": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4600,7 +1862,7 @@ "width": null } }, - "fd08587944f34eb2a2e72d6ebdf11dc9": { + "4958d65528a844608ea6e6be77787f4c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -4616,7 +1878,7 @@ "description_width": "" } }, - "5f8560ee9dea418a85f693dd4f771a72": { + "144d3a9883ac4d8086d2c018b1779f8d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4668,7 +1930,7 @@ "width": null } }, - "961a5e9e866f4a0aa772e0395d2c5cc4": { + "4780eed53cbf49f4a7d0062e301f7414": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -4683,7 +1945,7 @@ "description_width": "" } }, - "9ba34593f7c449acacde80b276e47e5e": { + "b8d2085d254a4f8881287ad4e9b9ba63": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -4698,14 +1960,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_b227d4b242384297883d73e2ef4ad36b", - "IPY_MODEL_2aa6f24c423a4c9fbae4c9e2f59cd1ef", - "IPY_MODEL_13147575bc324653ad03fe2b764c0f30" + "IPY_MODEL_4350660466954ab985d7c2b5c2d50dfb", + "IPY_MODEL_bec06e5cb6b245d2b1b01dfe5444109f", + "IPY_MODEL_ae62f3c097754596ac8f7fac5cc1a619" ], - "layout": "IPY_MODEL_622c32270e6548ed9514345c0c1450cb" + "layout": "IPY_MODEL_f8587c86967343159b70520ca0e545d0" } }, - "b227d4b242384297883d73e2ef4ad36b": { + "4350660466954ab985d7c2b5c2d50dfb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -4720,13 +1982,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_15f0540100a44d1cada3d2f6a0e927d4", + "layout": "IPY_MODEL_d972ef0e134d469b8c8cb279e07b7c20", "placeholder": "​", - "style": "IPY_MODEL_183376a937604feca5a8fe1e01e8d8e6", - "value": "1_Pooling/config.json: 100%" + "style": "IPY_MODEL_cc9c4fef61984633a8d259dc3130230d", + "value": "Generating train split: 100%" } }, - "2aa6f24c423a4c9fbae4c9e2f59cd1ef": { + "bec06e5cb6b245d2b1b01dfe5444109f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -4742,15 +2004,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_e192d2a4b79b45ff858d90751fc7ebea", - "max": 190, + "layout": "IPY_MODEL_40d840d499ff42b98c178dad6498ce94", + "max": 414, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_2693cf39ba874fff92e1b5294e796ed7", - "value": 190 + "style": "IPY_MODEL_2e9e79856a724a35b8b2a5f7d255b68d", + "value": 414 } }, - "13147575bc324653ad03fe2b764c0f30": { + "ae62f3c097754596ac8f7fac5cc1a619": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -4765,13 +2027,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_a070c177e8f94802a073d9be6a3226b6", + "layout": "IPY_MODEL_fab1c206f95e40a7b5404d46a1e36675", "placeholder": "​", - "style": "IPY_MODEL_984117efb8c548ccb2debd825d89d156", - "value": " 190/190 [00:00<00:00, 11.2kB/s]" + "style": "IPY_MODEL_55faf3f1a3c24d70be75a9f60f74ee56", + "value": " 414/414 [00:00<00:00, 2341.44 examples/s]" } }, - "622c32270e6548ed9514345c0c1450cb": { + "f8587c86967343159b70520ca0e545d0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4823,7 +2085,7 @@ "width": null } }, - "15f0540100a44d1cada3d2f6a0e927d4": { + "d972ef0e134d469b8c8cb279e07b7c20": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4875,7 +2137,7 @@ "width": null } }, - "183376a937604feca5a8fe1e01e8d8e6": { + "cc9c4fef61984633a8d259dc3130230d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -4890,7 +2152,7 @@ "description_width": "" } }, - "e192d2a4b79b45ff858d90751fc7ebea": { + "40d840d499ff42b98c178dad6498ce94": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -4942,7 +2204,7 @@ "width": null } }, - "2693cf39ba874fff92e1b5294e796ed7": { + "2e9e79856a724a35b8b2a5f7d255b68d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -4958,7 +2220,7 @@ "description_width": "" } }, - "a070c177e8f94802a073d9be6a3226b6": { + "fab1c206f95e40a7b5404d46a1e36675": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5010,7 +2272,7 @@ "width": null } }, - "984117efb8c548ccb2debd825d89d156": { + "55faf3f1a3c24d70be75a9f60f74ee56": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -5025,7 +2287,7 @@ "description_width": "" } }, - "1b7a1c758c9844f6bb3297f5e907b125": { + "ea5a2db561964901958dabafbb52ad3e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -5040,14 +2302,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_a99fdb97b94b42b1ae2ede146cc02512", - "IPY_MODEL_ce8efeb255504bcd8797022b70c89ac0", - "IPY_MODEL_769a7ab752aa41a297ffa878f3b717b6" + "IPY_MODEL_db45b8f6aa3c41e7a507fe2a4311e4d3", + "IPY_MODEL_6196383847cd455a8c1b3d64dd64f374", + "IPY_MODEL_710a214347d34ef689b2cf79d5f32a66" ], - "layout": "IPY_MODEL_902ca7262ca94a7180f522df6b9b54f9" + "layout": "IPY_MODEL_f71910c3ac824cd59f0b07a2c2ff0de8" } }, - "a99fdb97b94b42b1ae2ede146cc02512": { + "db45b8f6aa3c41e7a507fe2a4311e4d3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -5062,13 +2324,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_5f85ba1a93d04dbda65acab723e60ba2", + "layout": "IPY_MODEL_e5a2126f30f443e6815db58bf215c67b", "placeholder": "​", - "style": "IPY_MODEL_125128e3613f40feb8153e3dd9e3b3aa", - "value": "Batches: 100%" + "style": "IPY_MODEL_3dc1b67aa00b40b696da039eff4c0780", + "value": "Saving the dataset (1/1 shards): 100%" } }, - "ce8efeb255504bcd8797022b70c89ac0": { + "6196383847cd455a8c1b3d64dd64f374": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -5084,15 +2346,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c2d320a7c814446db7159feeb907b6a2", - "max": 1, + "layout": "IPY_MODEL_c4b169eb0424470ba9fb64cafb547cdd", + "max": 414, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_f5d20157226846fd983f11904cf9eecf", - "value": 1 + "style": "IPY_MODEL_beced8d9f1404627833bc549b34d78e7", + "value": 414 } }, - "769a7ab752aa41a297ffa878f3b717b6": { + "710a214347d34ef689b2cf79d5f32a66": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -5107,13 +2369,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_fb117e7bda4a445ba65b472a64971440", + "layout": "IPY_MODEL_7adeb43a7c134edfa73aa5b9f47b1382", "placeholder": "​", - "style": "IPY_MODEL_ffcdbc40630a4056a8929fb5a8a88cdd", - "value": " 1/1 [00:00<00:00, 31.84it/s]" + "style": "IPY_MODEL_69a325bec0a647a5b76fd8480392a581", + "value": " 414/414 [00:00<00:00, 7859.44 examples/s]" } }, - "902ca7262ca94a7180f522df6b9b54f9": { + "f71910c3ac824cd59f0b07a2c2ff0de8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5165,7 +2427,7 @@ "width": null } }, - "5f85ba1a93d04dbda65acab723e60ba2": { + "e5a2126f30f443e6815db58bf215c67b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5217,7 +2479,7 @@ "width": null } }, - "125128e3613f40feb8153e3dd9e3b3aa": { + "3dc1b67aa00b40b696da039eff4c0780": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -5232,7 +2494,7 @@ "description_width": "" } }, - "c2d320a7c814446db7159feeb907b6a2": { + "c4b169eb0424470ba9fb64cafb547cdd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5284,7 +2546,7 @@ "width": null } }, - "f5d20157226846fd983f11904cf9eecf": { + "beced8d9f1404627833bc549b34d78e7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -5300,7 +2562,7 @@ "description_width": "" } }, - "fb117e7bda4a445ba65b472a64971440": { + "7adeb43a7c134edfa73aa5b9f47b1382": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5352,7 +2614,7 @@ "width": null } }, - "ffcdbc40630a4056a8929fb5a8a88cdd": { + "69a325bec0a647a5b76fd8480392a581": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -5367,7 +2629,7 @@ "description_width": "" } }, - "6164d4034ebc4c9998b24eea7fcb6c9c": { + "1a150a4385c947d19bbcb642611d0f5a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -5382,14 +2644,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_f2d186e9249c48739ad657d4fced59e1", - "IPY_MODEL_a1d3611608a04262acef968c4bcf5cab", - "IPY_MODEL_af433d70c63f4c98b7c7aa5805de545b" + "IPY_MODEL_79709e242e864d9ead8af5310e8f9f42", + "IPY_MODEL_b8f1e466858a4d289a4a471e64c148ab", + "IPY_MODEL_8e0ac888531944388c02c74785e56bea" ], - "layout": "IPY_MODEL_8ebc33062f62464fb07b85196fa51b83" + "layout": "IPY_MODEL_073fcf6950e24490859da4d21437e010" } }, - "f2d186e9249c48739ad657d4fced59e1": { + "79709e242e864d9ead8af5310e8f9f42": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -5404,13 +2666,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_7a04da2c8235414186ba778bcd747c55", + "layout": "IPY_MODEL_8a1dacef7bea4f609477fae2875d7dbd", "placeholder": "​", - "style": "IPY_MODEL_13b1aae5e01c4a9da32c2652cb50cb33", - "value": "Saving the dataset (1/1 shards): 100%" + "style": "IPY_MODEL_5a6471db3479484f86609b90a3cdb772", + "value": "Batches: 100%" } }, - "a1d3611608a04262acef968c4bcf5cab": { + "b8f1e466858a4d289a4a471e64c148ab": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -5426,15 +2688,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_849fbfab0ff94be89bc23b9f45956353", - "max": 414, + "layout": "IPY_MODEL_6c66014360cf439193e6ac0cecc9458d", + "max": 13, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_b846f7691776422598e50f2d9a6719cd", - "value": 414 + "style": "IPY_MODEL_b208eb7f196647cd8582151e595b5206", + "value": 13 } }, - "af433d70c63f4c98b7c7aa5805de545b": { + "8e0ac888531944388c02c74785e56bea": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -5449,13 +2711,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_9b64e88626304a8797b23e5f9f69752c", + "layout": "IPY_MODEL_4a08ab2c64734cf98553f715d3a48f2b", "placeholder": "​", - "style": "IPY_MODEL_6c8a85d410e44c34a1133610f43f1d2e", - "value": " 414/414 [00:00<00:00, 11528.63 examples/s]" + "style": "IPY_MODEL_0531d626edda4d9b8ee125044f72a81e", + "value": " 13/13 [03:38<00:00, 10.95s/it]" } }, - "8ebc33062f62464fb07b85196fa51b83": { + "073fcf6950e24490859da4d21437e010": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5507,7 +2769,7 @@ "width": null } }, - "7a04da2c8235414186ba778bcd747c55": { + "8a1dacef7bea4f609477fae2875d7dbd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5559,7 +2821,7 @@ "width": null } }, - "13b1aae5e01c4a9da32c2652cb50cb33": { + "5a6471db3479484f86609b90a3cdb772": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -5574,7 +2836,7 @@ "description_width": "" } }, - "849fbfab0ff94be89bc23b9f45956353": { + "6c66014360cf439193e6ac0cecc9458d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5626,7 +2888,7 @@ "width": null } }, - "b846f7691776422598e50f2d9a6719cd": { + "b208eb7f196647cd8582151e595b5206": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -5642,7 +2904,7 @@ "description_width": "" } }, - "9b64e88626304a8797b23e5f9f69752c": { + "4a08ab2c64734cf98553f715d3a48f2b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5694,7 +2956,7 @@ "width": null } }, - "6c8a85d410e44c34a1133610f43f1d2e": { + "0531d626edda4d9b8ee125044f72a81e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -5709,7 +2971,7 @@ "description_width": "" } }, - "e5c3429f78894b8088671a65198d35f0": { + "a89620d6be294e82a8f62b5382833c8b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -5724,14 +2986,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_2eac66b7087d4b1f86041f1fa49c8d92", - "IPY_MODEL_86a3053d094b4853b3258cde202c6f42", - "IPY_MODEL_6bd307756363414f832e8e5f23748cb2" + "IPY_MODEL_129bfeedabe14bde820e30761b4951a2", + "IPY_MODEL_6cd900679d174fe7983912993139738f", + "IPY_MODEL_b9c6e11024794547a8b872fa5e9d3ad5" ], - "layout": "IPY_MODEL_0fd8f5d27312461192ccf8fbc2212c4b" + "layout": "IPY_MODEL_70f6c23316174c3bb938e83311d653ca" } }, - "2eac66b7087d4b1f86041f1fa49c8d92": { + "129bfeedabe14bde820e30761b4951a2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -5746,13 +3008,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_181c9f9932e0483b8ca4fe4b13fe7ecb", + "layout": "IPY_MODEL_39dbbe22ff9741bfa55991555e544d50", "placeholder": "​", - "style": "IPY_MODEL_7c091738255b4610ad4e8f20952748a8", + "style": "IPY_MODEL_b496735e982d4d2e9185f0dfbf4a887e", "value": "Batches: 100%" } }, - "86a3053d094b4853b3258cde202c6f42": { + "6cd900679d174fe7983912993139738f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -5768,15 +3030,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_6f84050fc7e046a9bb2738a84356d43d", - "max": 13, + "layout": "IPY_MODEL_a1ca7953c4474532a5b39c3bfe253b8b", + "max": 1, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_244c4ae566a74163b0a2795374281256", - "value": 13 + "style": "IPY_MODEL_a68d8a70e19244c4ac96527c231dccb3", + "value": 1 } }, - "6bd307756363414f832e8e5f23748cb2": { + "b9c6e11024794547a8b872fa5e9d3ad5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -5791,13 +3053,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_39089e80fbaf495088f635bd15ab8bbe", + "layout": "IPY_MODEL_0994ad851d0c4484b202785330c693ae", "placeholder": "​", - "style": "IPY_MODEL_73a79127b8b74dd3baba0802f729feea", - "value": " 13/13 [00:04<00:00,  4.67it/s]" + "style": "IPY_MODEL_74ba15a0df444fe791c95794675cb877", + "value": " 1/1 [00:00<00:00,  4.64it/s]" } }, - "0fd8f5d27312461192ccf8fbc2212c4b": { + "70f6c23316174c3bb938e83311d653ca": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5849,7 +3111,7 @@ "width": null } }, - "181c9f9932e0483b8ca4fe4b13fe7ecb": { + "39dbbe22ff9741bfa55991555e544d50": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5901,7 +3163,7 @@ "width": null } }, - "7c091738255b4610ad4e8f20952748a8": { + "b496735e982d4d2e9185f0dfbf4a887e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -5916,7 +3178,7 @@ "description_width": "" } }, - "6f84050fc7e046a9bb2738a84356d43d": { + "a1ca7953c4474532a5b39c3bfe253b8b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -5968,7 +3230,7 @@ "width": null } }, - "244c4ae566a74163b0a2795374281256": { + "a68d8a70e19244c4ac96527c231dccb3": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -5984,7 +3246,7 @@ "description_width": "" } }, - "39089e80fbaf495088f635bd15ab8bbe": { + "0994ad851d0c4484b202785330c693ae": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -6036,7 +3298,7 @@ "width": null } }, - "73a79127b8b74dd3baba0802f729feea": { + "74ba15a0df444fe791c95794675cb877": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -6052,8 +3314,7 @@ } } } - }, - "accelerator": "GPU" + } }, "nbformat": 4, "nbformat_minor": 0