Skip to content

Commit

Permalink
Changed Default Ollama Embedding models to supported model: nomic-emb…
Browse files Browse the repository at this point in the history
…ed-text (#490)

* Changed Embedding model to supported model: nomic-embed-text

* Updated workflow yml with support for llm and embedding model
  • Loading branch information
jmdevita authored Feb 28, 2024
1 parent 9361465 commit 1735977
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 14 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/ollama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
LLM_FOR_TESTS: "orca-mini"
EMBEDDER_FOR_TESTS: "nomic-embed-text"

jobs:
run:
Expand Down Expand Up @@ -55,7 +56,10 @@ jobs:
run: hatch run lint:all

- name: Pull the LLM in the Ollama service
run: docker exec ollama ollama pull ${{ env.LLM_FOR_TESTS }}
run: docker exec ollama ollama pull ${{ env.LLM_FOR_TESTS }}

- name: Pull the Embedding Model in the Ollama service
run: docker exec ollama ollama pull ${{ env.EMBEDDER_FOR_TESTS }}

- name: Generate docs
if: matrix.python-version == '3.9' && runner.os == 'Linux'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class OllamaDocumentEmbedder:
def __init__(
self,
model: str = "orca-mini",
model: str = "nomic-embed-text",
url: str = "http://localhost:11434/api/embeddings",
generation_kwargs: Optional[Dict[str, Any]] = None,
timeout: int = 120,
Expand All @@ -21,7 +21,7 @@ def __init__(
):
"""
:param model: The name of the model to use. The model should be available in the running Ollama instance.
Default is "orca-mini".
Default is "nomic-embed-text". "https://ollama.com/library/nomic-embed-text"
:param url: The URL of the chat endpoint of a running Ollama instance.
Default is "http://localhost:11434/api/embeddings".
:param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
class OllamaTextEmbedder:
def __init__(
self,
model: str = "orca-mini",
model: str = "nomic-embed-text",
url: str = "http://localhost:11434/api/embeddings",
generation_kwargs: Optional[Dict[str, Any]] = None,
timeout: int = 120,
):
"""
:param model: The name of the model to use. The model should be available in the running Ollama instance.
Default is "orca-mini".
Default is "nomic-embed-text". "https://ollama.com/library/nomic-embed-text"
:param url: The URL of the chat endpoint of a running Ollama instance.
Default is "http://localhost:11434/api/embeddings".
:param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature,
Expand Down
12 changes: 6 additions & 6 deletions integrations/ollama/tests/test_document_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ def test_init_defaults(self):
assert embedder.timeout == 120
assert embedder.generation_kwargs == {}
assert embedder.url == "http://localhost:11434/api/embeddings"
assert embedder.model == "orca-mini"
assert embedder.model == "nomic-embed-text"

def test_init(self):
embedder = OllamaDocumentEmbedder(
model="orca-mini",
model="nomic-embed-text",
url="http://my-custom-endpoint:11434/api/embeddings",
generation_kwargs={"temperature": 0.5},
timeout=3000,
Expand All @@ -24,7 +24,7 @@ def test_init(self):
assert embedder.timeout == 3000
assert embedder.generation_kwargs == {"temperature": 0.5}
assert embedder.url == "http://my-custom-endpoint:11434/api/embeddings"
assert embedder.model == "orca-mini"
assert embedder.model == "nomic-embed-text"

@pytest.mark.integration
def test_model_not_found(self):
Expand All @@ -35,17 +35,17 @@ def test_model_not_found(self):

@pytest.mark.integration
def import_text_in_embedder(self):
embedder = OllamaDocumentEmbedder(model="orca-mini")
embedder = OllamaDocumentEmbedder(model="nomic-embed-text")

with pytest.raises(TypeError):
embedder.run("This is a text string. This should not work.")

@pytest.mark.integration
def test_run(self):
embedder = OllamaDocumentEmbedder(model="orca-mini")
embedder = OllamaDocumentEmbedder(model="nomic-embed-text")
list_of_docs = [Document(content="This is a document containing some text.")]
reply = embedder.run(list_of_docs)

assert isinstance(reply, dict)
assert all(isinstance(element, float) for element in reply["documents"][0].embedding)
assert reply["meta"]["model"] == "orca-mini"
assert reply["meta"]["model"] == "nomic-embed-text"
6 changes: 3 additions & 3 deletions integrations/ollama/tests/test_text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_init_defaults(self):
assert embedder.timeout == 120
assert embedder.generation_kwargs == {}
assert embedder.url == "http://localhost:11434/api/embeddings"
assert embedder.model == "orca-mini"
assert embedder.model == "nomic-embed-text"

def test_init(self):
embedder = OllamaTextEmbedder(
Expand All @@ -34,10 +34,10 @@ def test_model_not_found(self):

@pytest.mark.integration
def test_run(self):
embedder = OllamaTextEmbedder(model="orca-mini")
embedder = OllamaTextEmbedder(model="nomic-embed-text")

reply = embedder.run("hello")

assert isinstance(reply, dict)
assert all(isinstance(element, float) for element in reply["embedding"])
assert reply["meta"]["model"] == "orca-mini"
assert reply["meta"]["model"] == "nomic-embed-text"

0 comments on commit 1735977

Please sign in to comment.