diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml index 962537390..055f63dd0 100644 --- a/.github/workflows/ollama.yml +++ b/.github/workflows/ollama.yml @@ -22,6 +22,7 @@ env: PYTHONUNBUFFERED: "1" FORCE_COLOR: "1" LLM_FOR_TESTS: "orca-mini" + EMBEDDER_FOR_TESTS: "nomic-embed-text" jobs: run: @@ -55,7 +56,10 @@ jobs: run: hatch run lint:all - name: Pull the LLM in the Ollama service - run: docker exec ollama ollama pull ${{ env.LLM_FOR_TESTS }} + run: docker exec ollama ollama pull ${{ env.LLM_FOR_TESTS }} + + - name: Pull the Embedding Model in the Ollama service + run: docker exec ollama ollama pull ${{ env.EMBEDDER_FOR_TESTS }} - name: Generate docs if: matrix.python-version == '3.9' && runner.os == 'Linux' diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py index 17b32f065..6e3273e1c 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py @@ -9,7 +9,7 @@ class OllamaDocumentEmbedder: def __init__( self, - model: str = "orca-mini", + model: str = "nomic-embed-text", url: str = "http://localhost:11434/api/embeddings", generation_kwargs: Optional[Dict[str, Any]] = None, timeout: int = 120, @@ -21,7 +21,7 @@ def __init__( ): """ :param model: The name of the model to use. The model should be available in the running Ollama instance. - Default is "orca-mini". + Default is "nomic-embed-text". "https://ollama.com/library/nomic-embed-text" :param url: The URL of the chat endpoint of a running Ollama instance. Default is "http://localhost:11434/api/embeddings". :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py index e27fd9ff4..e2ef136b4 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py @@ -8,14 +8,14 @@ class OllamaTextEmbedder: def __init__( self, - model: str = "orca-mini", + model: str = "nomic-embed-text", url: str = "http://localhost:11434/api/embeddings", generation_kwargs: Optional[Dict[str, Any]] = None, timeout: int = 120, ): """ :param model: The name of the model to use. The model should be available in the running Ollama instance. - Default is "orca-mini". + Default is "nomic-embed-text". "https://ollama.com/library/nomic-embed-text" :param url: The URL of the chat endpoint of a running Ollama instance. Default is "http://localhost:11434/api/embeddings". :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, diff --git a/integrations/ollama/tests/test_document_embedder.py b/integrations/ollama/tests/test_document_embedder.py index a5694db33..012ad9eae 100644 --- a/integrations/ollama/tests/test_document_embedder.py +++ b/integrations/ollama/tests/test_document_embedder.py @@ -11,11 +11,11 @@ def test_init_defaults(self): assert embedder.timeout == 120 assert embedder.generation_kwargs == {} assert embedder.url == "http://localhost:11434/api/embeddings" - assert embedder.model == "orca-mini" + assert embedder.model == "nomic-embed-text" def test_init(self): embedder = OllamaDocumentEmbedder( - model="orca-mini", + model="nomic-embed-text", url="http://my-custom-endpoint:11434/api/embeddings", generation_kwargs={"temperature": 0.5}, timeout=3000, @@ -24,7 +24,7 @@ def test_init(self): assert embedder.timeout == 3000 assert embedder.generation_kwargs == {"temperature": 0.5} assert embedder.url == "http://my-custom-endpoint:11434/api/embeddings" - assert embedder.model == "orca-mini" + assert embedder.model == "nomic-embed-text" @pytest.mark.integration def test_model_not_found(self): @@ -35,17 +35,17 @@ def test_model_not_found(self): @pytest.mark.integration def import_text_in_embedder(self): - embedder = OllamaDocumentEmbedder(model="orca-mini") + embedder = OllamaDocumentEmbedder(model="nomic-embed-text") with pytest.raises(TypeError): embedder.run("This is a text string. This should not work.") @pytest.mark.integration def test_run(self): - embedder = OllamaDocumentEmbedder(model="orca-mini") + embedder = OllamaDocumentEmbedder(model="nomic-embed-text") list_of_docs = [Document(content="This is a document containing some text.")] reply = embedder.run(list_of_docs) assert isinstance(reply, dict) assert all(isinstance(element, float) for element in reply["documents"][0].embedding) - assert reply["meta"]["model"] == "orca-mini" + assert reply["meta"]["model"] == "nomic-embed-text" diff --git a/integrations/ollama/tests/test_text_embedder.py b/integrations/ollama/tests/test_text_embedder.py index 9d3321e64..f4d45afec 100644 --- a/integrations/ollama/tests/test_text_embedder.py +++ b/integrations/ollama/tests/test_text_embedder.py @@ -10,7 +10,7 @@ def test_init_defaults(self): assert embedder.timeout == 120 assert embedder.generation_kwargs == {} assert embedder.url == "http://localhost:11434/api/embeddings" - assert embedder.model == "orca-mini" + assert embedder.model == "nomic-embed-text" def test_init(self): embedder = OllamaTextEmbedder( @@ -34,10 +34,10 @@ def test_model_not_found(self): @pytest.mark.integration def test_run(self): - embedder = OllamaTextEmbedder(model="orca-mini") + embedder = OllamaTextEmbedder(model="nomic-embed-text") reply = embedder.run("hello") assert isinstance(reply, dict) assert all(isinstance(element, float) for element in reply["embedding"]) - assert reply["meta"]["model"] == "orca-mini" + assert reply["meta"]["model"] == "nomic-embed-text"