Skip to content

Commit

Permalink
fix wheel
Browse files Browse the repository at this point in the history
  • Loading branch information
masci committed Dec 12, 2023
1 parent 0982928 commit 2f1b737
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 68 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
from instructor_embedders_haystack.instructor_document_embedder import InstructorDocumentEmbedder
from instructor_embedders_haystack.instructor_text_embedder import InstructorTextEmbedder

__all__ = ["InstructorDocumentEmbedder", "InstructorTextEmbedder"]
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from haystack import Document, component, default_from_dict, default_to_dict

from instructor_embedders.embedding_backend.instructor_backend import _InstructorEmbeddingBackendFactory
from instructor_embedders_haystack.embedding_backend.instructor_backend import _InstructorEmbeddingBackendFactory


@component
Expand All @@ -19,7 +19,7 @@ class InstructorDocumentEmbedder:
# To use this component, install the "instructor-embedders-haystack" package.
# pip install instructor-embedders-haystack
from instructor_embedders.instructor_document_embedder import InstructorDocumentEmbedder
from instructor_embedders_haystack.instructor_document_embedder import InstructorDocumentEmbedder
from haystack.dataclasses import Document
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from haystack import component, default_from_dict, default_to_dict

from instructor_embedders.embedding_backend.instructor_backend import _InstructorEmbeddingBackendFactory
from instructor_embedders_haystack.embedding_backend.instructor_backend import _InstructorEmbeddingBackendFactory


@component
Expand All @@ -18,7 +18,7 @@ class InstructorTextEmbedder:
# To use this component, install the "instructor-embedders-haystack" package.
# pip install instructor-embedders-haystack
from instructor_embedders.instructor_text_embedder import InstructorTextEmbedder
from instructor_embedders_haystack.instructor_text_embedder import InstructorTextEmbedder
text = "It clearly says online this will work on a Mac OS system. The disk comes and it does not, only Windows. Do Not order this if you have a Mac!!"
instruction = (
Expand Down
48 changes: 41 additions & 7 deletions integrations/instructor-embedders/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,46 @@ dependencies = [
dev = ["pytest"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-extras/tree/main/components/instructor-embedders#readme"
Issues = "https://github.com/deepset-ai/haystack-extras/issues"
Source = "https://github.com/deepset-ai/haystack-extras/tree/main/components/instructor-embedders"
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/components/instructor-embedders#readme"
Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/components/instructor-embedders"

[tool.hatch.version]
path = "instructor_embedders/__about__.py"
path = "instructor_embedders_haystack/__about__.py"

[tool.hatch.envs.default]
dependencies = ["pytest", "pytest-cov"]

[tool.hatch.envs.default.scripts]
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=instructor-embedders --cov=tests"
no-cov = "cov --no-cov"
test = "pytest {args:tests}"

[[tool.hatch.envs.test.matrix]]
python = ["37", "38", "39", "310", "311"]
python = ["38", "39", "310", "311"]

[tool.hatch.envs.lint]
detached = true
dependencies = [
"black>=23.1.0",
"mypy>=1.0.0",
"ruff>=0.0.243",
]
[tool.hatch.envs.lint.scripts]
typing = "mypy --install-types --non-interactive {args:instructor_embedders_haystack tests}"
style = [
"ruff {args:.}",
"black --check --diff {args:.}",
]
fmt = [
"black {args:.}",
"ruff --fix {args:.}",
"style",
]
all = [
"style",
"typing",
]

[tool.coverage.run]
branch = true
Expand All @@ -76,7 +100,7 @@ omit = ["instructor_embedders/__about__.py"]
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]

[tool.ruff]
target-version = "py37"
target-version = "py38"
line-length = 120
select = [
"A",
Expand Down Expand Up @@ -132,8 +156,18 @@ ban-relative-imports = "all"
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "--strict-markers"
markers = ["integration: integration tests", "unit: unit tests"]
markers = ["integration: integration tests"]
log_cli = true

[tool.black]
line-length = 120

[[tool.mypy.overrides]]
module = [
"instructor_embedders_haystack.*",
"InstructorEmbedding.*",
"haystack.*",
"pytest.*",
"numpy.*",
]
ignore_missing_imports = true
19 changes: 11 additions & 8 deletions integrations/instructor-embedders/tests/test_instructor_backend.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from unittest.mock import patch

import pytest

from instructor_embedders.embedding_backend.instructor_backend import _InstructorEmbeddingBackendFactory
from instructor_embedders_haystack.embedding_backend.instructor_backend import _InstructorEmbeddingBackendFactory


@pytest.mark.unit
@patch("instructor_embedders.embedding_backend.instructor_backend.INSTRUCTOR")
@patch("instructor_embedders_haystack.embedding_backend.instructor_backend.INSTRUCTOR")
def test_factory_behavior(mock_instructor): # noqa: ARG001
embedding_backend = _InstructorEmbeddingBackendFactory.get_embedding_backend(
model_name_or_path="hkunlp/instructor-large", device="cpu"
Expand All @@ -19,20 +17,23 @@ def test_factory_behavior(mock_instructor): # noqa: ARG001
assert same_embedding_backend is embedding_backend
assert another_embedding_backend is not embedding_backend

# restore the factory state
_InstructorEmbeddingBackendFactory._instances = {}

@pytest.mark.unit
@patch("instructor_embedders.embedding_backend.instructor_backend.INSTRUCTOR")

@patch("instructor_embedders_haystack.embedding_backend.instructor_backend.INSTRUCTOR")
def test_model_initialization(mock_instructor):
_InstructorEmbeddingBackendFactory.get_embedding_backend(
model_name_or_path="hkunlp/instructor-base", device="cpu", use_auth_token="huggingface_auth_token"
)
mock_instructor.assert_called_once_with(
model_name_or_path="hkunlp/instructor-base", device="cpu", use_auth_token="huggingface_auth_token"
)
# restore the factory state
_InstructorEmbeddingBackendFactory._instances = {}


@pytest.mark.unit
@patch("instructor_embedders.embedding_backend.instructor_backend.INSTRUCTOR")
@patch("instructor_embedders_haystack.embedding_backend.instructor_backend.INSTRUCTOR")
def test_embedding_function_with_kwargs(mock_instructor): # noqa: ARG001
embedding_backend = _InstructorEmbeddingBackendFactory.get_embedding_backend(
model_name_or_path="hkunlp/instructor-base"
Expand All @@ -42,3 +43,5 @@ def test_embedding_function_with_kwargs(mock_instructor): # noqa: ARG001
embedding_backend.embed(data=data, normalize_embeddings=True)

embedding_backend.model.encode.assert_called_once_with(data, normalize_embeddings=True)
# restore the factory state
_InstructorEmbeddingBackendFactory._instances = {}
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
import pytest
from haystack import Document

from instructor_embedders.instructor_document_embedder import InstructorDocumentEmbedder
from instructor_embedders_haystack.instructor_document_embedder import InstructorDocumentEmbedder


class TestInstructorDocumentEmbedder:
@pytest.mark.unit
def test_init_default(self):
"""
Test default initialization parameters for InstructorDocumentEmbedder.
Expand All @@ -24,7 +23,6 @@ def test_init_default(self):
assert embedder.metadata_fields_to_embed == []
assert embedder.embedding_separator == "\n"

@pytest.mark.unit
def test_init_with_parameters(self):
"""
Test custom initialization parameters for InstructorDocumentEmbedder.
Expand All @@ -50,15 +48,14 @@ def test_init_with_parameters(self):
assert embedder.metadata_fields_to_embed == ["test_field"]
assert embedder.embedding_separator == " | "

@pytest.mark.unit
def test_to_dict(self):
"""
Test serialization of InstructorDocumentEmbedder to a dictionary, using default initialization parameters.
"""
embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base")
embedder_dict = embedder.to_dict()
assert embedder_dict == {
"type": "instructor_embedders.instructor_document_embedder.InstructorDocumentEmbedder",
"type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
"init_parameters": {
"model_name_or_path": "hkunlp/instructor-base",
"device": "cpu",
Expand All @@ -72,7 +69,6 @@ def test_to_dict(self):
},
}

@pytest.mark.unit
def test_to_dict_with_custom_init_parameters(self):
"""
Test serialization of InstructorDocumentEmbedder to a dictionary, using custom initialization parameters.
Expand All @@ -90,7 +86,7 @@ def test_to_dict_with_custom_init_parameters(self):
)
embedder_dict = embedder.to_dict()
assert embedder_dict == {
"type": "instructor_embedders.instructor_document_embedder.InstructorDocumentEmbedder",
"type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
"init_parameters": {
"model_name_or_path": "hkunlp/instructor-base",
"device": "cuda",
Expand All @@ -104,13 +100,12 @@ def test_to_dict_with_custom_init_parameters(self):
},
}

@pytest.mark.unit
def test_from_dict(self):
"""
Test deserialization of InstructorDocumentEmbedder from a dictionary, using default initialization parameters.
"""
embedder_dict = {
"type": "instructor_embedders.instructor_document_embedder.InstructorDocumentEmbedder",
"type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
"init_parameters": {
"model_name_or_path": "hkunlp/instructor-base",
"device": "cpu",
Expand All @@ -134,13 +129,12 @@ def test_from_dict(self):
assert embedder.metadata_fields_to_embed == []
assert embedder.embedding_separator == "\n"

@pytest.mark.unit
def test_from_dict_with_custom_init_parameters(self):
"""
Test deserialization of InstructorDocumentEmbedder from a dictionary, using custom initialization parameters.
"""
embedder_dict = {
"type": "instructor_embedders.instructor_document_embedder.InstructorDocumentEmbedder",
"type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
"init_parameters": {
"model_name_or_path": "hkunlp/instructor-base",
"device": "cuda",
Expand All @@ -164,8 +158,7 @@ def test_from_dict_with_custom_init_parameters(self):
assert embedder.metadata_fields_to_embed == ["test_field"]
assert embedder.embedding_separator == " | "

@pytest.mark.unit
@patch("instructor_embedders.instructor_document_embedder._InstructorEmbeddingBackendFactory")
@patch("instructor_embedders_haystack.instructor_document_embedder._InstructorEmbeddingBackendFactory")
def test_warmup(self, mocked_factory):
"""
Test for checking embedder instances after warm-up.
Expand All @@ -177,8 +170,7 @@ def test_warmup(self, mocked_factory):
model_name_or_path="hkunlp/instructor-base", device="cpu", use_auth_token=None
)

@pytest.mark.unit
@patch("instructor_embedders.instructor_document_embedder._InstructorEmbeddingBackendFactory")
@patch("instructor_embedders_haystack.instructor_document_embedder._InstructorEmbeddingBackendFactory")
def test_warmup_does_not_reload(self, mocked_factory):
"""
Test for checking backend instances after multiple warm-ups.
Expand All @@ -189,7 +181,6 @@ def test_warmup_does_not_reload(self, mocked_factory):
embedder.warm_up()
mocked_factory.get_embedding_backend.assert_called_once()

@pytest.mark.unit
def test_embed(self):
"""
Test for checking output dimensions and embedding dimensions.
Expand All @@ -209,7 +200,6 @@ def test_embed(self):
assert isinstance(doc.embedding, list)
assert isinstance(doc.embedding[0], float)

@pytest.mark.unit
def test_embed_incorrect_input_format(self):
"""
Test for checking incorrect input format when creating embedding.
Expand All @@ -225,7 +215,6 @@ def test_embed_incorrect_input_format(self):
with pytest.raises(TypeError, match="InstructorDocumentEmbedder expects a list of Documents as input."):
embedder.run(documents=list_integers_input)

@pytest.mark.unit
def test_embed_metadata(self):
"""
Test for checking output dimensions and embedding dimensions for documents
Expand Down

This file was deleted.

Loading

0 comments on commit 2f1b737

Please sign in to comment.