From 059cf341ec8b7fb19276b9e842807d95c3103040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Krassowski?= <5832902+krassowski@users.noreply.github.com> Date: Wed, 20 Dec 2023 22:21:54 +0000 Subject: [PATCH] Backport PR #533: Document entry point and API for custom embedding models --- docs/source/developers/index.md | 31 ++++++++++++++++++- .../jupyter_ai_magics/__init__.py | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/source/developers/index.md b/docs/source/developers/index.md index ba3c969d7..d2759039e 100644 --- a/docs/source/developers/index.md +++ b/docs/source/developers/index.md @@ -87,6 +87,35 @@ your new provider's `id`: [LLM]: https://api.python.langchain.com/en/v0.0.339/llms/langchain.llms.base.LLM.html#langchain.llms.base.LLM [BaseChatModel]: https://api.python.langchain.com/en/v0.0.339/chat_models/langchain.chat_models.base.BaseChatModel.html +### Custom embeddings providers + +To provide a custom embeddings model an embeddings providers should be defined implementing the API of `jupyter-ai`'s `BaseEmbeddingsProvider` and of `langchain`'s [`Embeddings`][Embeddings] abstract class. + +```python +from jupyter_ai_magics import BaseEmbeddingsProvider +from langchain.embeddings import FakeEmbeddings + +class MyEmbeddingsProvider(BaseEmbeddingsProvider, FakeEmbeddings): + id = "my_embeddings_provider" + name = "My Embeddings Provider" + model_id_key = "model" + models = ["my_model"] + + def __init__(self, **kwargs): + super().__init__(size=300, **kwargs) +``` + +Jupyter AI uses entry points to discover embedding providers. +In the `pyproject.toml` file, add your custom embedding provider to the +`[project.entry-points."jupyter_ai.embeddings_model_providers"]` section: + +```toml +[project.entry-points."jupyter_ai.embeddings_model_providers"] +my-provider = "my_provider:MyEmbeddingsProvider" +``` + +[Embeddings]: https://api.python.langchain.com/en/stable/embeddings/langchain_core.embeddings.Embeddings.html + ## Prompt templates Each provider can define **prompt templates** for each supported format. A prompt @@ -155,7 +184,7 @@ Jupyter AI uses entry points to support custom slash commands. In the `pyproject.toml` file, add your custom handler to the `[project.entry-points."jupyter_ai.chat_handlers"]` section: -``` +```toml [project.entry-points."jupyter_ai.chat_handlers"] custom = "custom_package:CustomChatHandler" ``` diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/__init__.py b/packages/jupyter-ai-magics/jupyter_ai_magics/__init__.py index b991a49b9..cf808476a 100644 --- a/packages/jupyter-ai-magics/jupyter_ai_magics/__init__.py +++ b/packages/jupyter-ai-magics/jupyter_ai_magics/__init__.py @@ -2,6 +2,7 @@ # expose embedding model providers on the package root from .embedding_providers import ( + BaseEmbeddingsProvider, BedrockEmbeddingsProvider, CohereEmbeddingsProvider, GPT4AllEmbeddingsProvider,