From 5f813373eb4a220b542bc9a1e01893e1582843f3 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Fri, 3 May 2024 10:14:54 +0200 Subject: [PATCH] chore: Update huggingface_hub classes used after library upgrade (#7631) * Update huggingface_hub classes used after library upgrade * Fix chat tests * Update lazy import guard and other references to huggingface_hub>=0.23.0 * In huggingface_hub 0.23.0 TextGenerationOutput property details is now optional * More fixes * Add reno note --- .../hugging_face_api_document_embedder.py | 2 +- .../hugging_face_api_text_embedder.py | 2 +- .../hugging_face_tei_document_embedder.py | 2 +- .../hugging_face_tei_text_embedder.py | 2 +- .../generators/chat/hugging_face_api.py | 2 +- .../generators/chat/hugging_face_tgi.py | 21 +++++++++++------ .../components/generators/hugging_face_api.py | 6 ++--- .../components/generators/hugging_face_tgi.py | 17 ++++++++++---- haystack/utils/hf.py | 2 +- pyproject.toml | 2 +- ...gface-hub-dependency-9b8a89d50eb88fea.yaml | 4 ++++ .../generators/chat/test_hugging_face_api.py | 23 ++++++++++++++----- .../generators/chat/test_hugging_face_tgi.py | 10 ++++++-- .../generators/test_hugging_face_api.py | 10 ++++++-- .../generators/test_hugging_face_tgi.py | 10 ++++++-- 15 files changed, 81 insertions(+), 34 deletions(-) create mode 100644 releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml diff --git a/haystack/components/embedders/hugging_face_api_document_embedder.py b/haystack/components/embedders/hugging_face_api_document_embedder.py index d6bcb6c52a..188d449821 100644 --- a/haystack/components/embedders/hugging_face_api_document_embedder.py +++ b/haystack/components/embedders/hugging_face_api_document_embedder.py @@ -10,7 +10,7 @@ from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model from haystack.utils.url_validation import is_valid_http_url -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import InferenceClient logger = logging.getLogger(__name__) diff --git a/haystack/components/embedders/hugging_face_api_text_embedder.py b/haystack/components/embedders/hugging_face_api_text_embedder.py index 7404f28396..10db3c0121 100644 --- a/haystack/components/embedders/hugging_face_api_text_embedder.py +++ b/haystack/components/embedders/hugging_face_api_text_embedder.py @@ -7,7 +7,7 @@ from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model from haystack.utils.url_validation import is_valid_http_url -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import InferenceClient logger = logging.getLogger(__name__) diff --git a/haystack/components/embedders/hugging_face_tei_document_embedder.py b/haystack/components/embedders/hugging_face_tei_document_embedder.py index e721395a15..8d55af0892 100644 --- a/haystack/components/embedders/hugging_face_tei_document_embedder.py +++ b/haystack/components/embedders/hugging_face_tei_document_embedder.py @@ -11,7 +11,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack.utils.hf import HFModelType, check_valid_model -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import InferenceClient logger = logging.getLogger(__name__) diff --git a/haystack/components/embedders/hugging_face_tei_text_embedder.py b/haystack/components/embedders/hugging_face_tei_text_embedder.py index 5956ee1d83..477003f305 100644 --- a/haystack/components/embedders/hugging_face_tei_text_embedder.py +++ b/haystack/components/embedders/hugging_face_tei_text_embedder.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack.utils.hf import HFModelType, check_valid_model -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import InferenceClient logger = logging.getLogger(__name__) diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index eac3877aca..0deea74ab4 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -7,7 +7,7 @@ from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model from haystack.utils.url_validation import is_valid_http_url -with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import ChatCompletionOutput, ChatCompletionStreamOutput, InferenceClient diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py index 0956468639..704aaec9a0 100644 --- a/haystack/components/generators/chat/hugging_face_tgi.py +++ b/haystack/components/generators/chat/hugging_face_tgi.py @@ -9,7 +9,7 @@ from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\" transformers'") as transformers_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\" transformers'") as transformers_import: from huggingface_hub import ( InferenceClient, TextGenerationOutput, @@ -275,13 +275,13 @@ def _run_streaming( message = ChatMessage.from_assistant(chunk.generated_text) message.meta.update( { - "finish_reason": chunk.details.finish_reason, + "finish_reason": chunk.details.finish_reason if chunk.details else None, "index": 0, "model": self.client.model, "usage": { - "completion_tokens": chunk.details.generated_tokens, + "completion_tokens": chunk.details.generated_tokens if chunk.details else 0, "prompt_tokens": prompt_token_count, - "total_tokens": prompt_token_count + chunk.details.generated_tokens, + "total_tokens": prompt_token_count + chunk.details.generated_tokens if chunk.details else 0, }, } ) @@ -294,15 +294,22 @@ def _run_non_streaming( for _i in range(num_responses): tgr: TextGenerationOutput = self.client.text_generation(prepared_prompt, details=True, **generation_kwargs) message = ChatMessage.from_assistant(tgr.generated_text) + if tgr.details: + completion_tokens = len(tgr.details.tokens) + prompt_token_count = prompt_token_count + completion_tokens + finish_reason = tgr.details.finish_reason + else: + finish_reason = None + completion_tokens = 0 message.meta.update( { - "finish_reason": tgr.details.finish_reason, + "finish_reason": finish_reason, "index": _i, "model": self.client.model, "usage": { - "completion_tokens": len(tgr.details.tokens), + "completion_tokens": completion_tokens, "prompt_tokens": prompt_token_count, - "total_tokens": prompt_token_count + len(tgr.details.tokens), + "total_tokens": prompt_token_count + completion_tokens, }, } ) diff --git a/haystack/components/generators/hugging_face_api.py b/haystack/components/generators/hugging_face_api.py index a6d34431c3..b0dd1aafb4 100644 --- a/haystack/components/generators/hugging_face_api.py +++ b/haystack/components/generators/hugging_face_api.py @@ -8,7 +8,7 @@ from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model from haystack.utils.url_validation import is_valid_http_url -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import ( InferenceClient, TextGenerationOutput, @@ -208,8 +208,8 @@ def _run_non_streaming(self, prompt: str, generation_kwargs: Dict[str, Any]): meta = [ { "model": self._client.model, - "finish_reason": tgr.details.finish_reason, - "usage": {"completion_tokens": len(tgr.details.tokens)}, + "finish_reason": tgr.details.finish_reason if tgr.details else None, + "usage": {"completion_tokens": len(tgr.details.tokens) if tgr.details else 0}, } ] return {"replies": [tgr.generated_text], "meta": meta} diff --git a/haystack/components/generators/hugging_face_tgi.py b/haystack/components/generators/hugging_face_tgi.py index 1014a42999..24f5101785 100644 --- a/haystack/components/generators/hugging_face_tgi.py +++ b/haystack/components/generators/hugging_face_tgi.py @@ -9,7 +9,7 @@ from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\" transformers'") as transformers_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\" transformers'") as transformers_import: from huggingface_hub import ( InferenceClient, TextGenerationOutput, @@ -57,7 +57,7 @@ class HuggingFaceTGIGenerator: client = HuggingFaceTGIGenerator(model="mistralai/Mistral-7B-v0.1", token=Secret.from_token("")) client.warm_up() - response = client.run("What's Natural Language Processing?", max_new_tokens=120) + response = client.run("What's Natural Language Processing?", generation_kwargs={"max_new_tokens": 120}) print(response) ``` @@ -255,15 +255,22 @@ def _run_non_streaming( all_metadata: List[Dict[str, Any]] = [] for _i in range(num_responses): tgr: TextGenerationOutput = self.client.text_generation(prompt, details=True, **generation_kwargs) + if tgr.details: + completion_tokens = len(tgr.details.tokens) + prompt_token_count = prompt_token_count + completion_tokens + finish_reason = tgr.details.finish_reason + else: + finish_reason = None + completion_tokens = 0 all_metadata.append( { "model": self.client.model, "index": _i, - "finish_reason": tgr.details.finish_reason, + "finish_reason": finish_reason, "usage": { - "completion_tokens": len(tgr.details.tokens), + "completion_tokens": completion_tokens, "prompt_tokens": prompt_token_count, - "total_tokens": prompt_token_count + len(tgr.details.tokens), + "total_tokens": prompt_token_count + completion_tokens, }, } ) diff --git a/haystack/utils/hf.py b/haystack/utils/hf.py index 6e7d539bec..dcd423a103 100644 --- a/haystack/utils/hf.py +++ b/haystack/utils/hf.py @@ -14,7 +14,7 @@ with LazyImport(message="Run 'pip install transformers[torch]'") as torch_import: import torch -with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import: +with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import: from huggingface_hub import HfApi, InferenceClient, model_info from huggingface_hub.utils import RepositoryNotFoundError diff --git a/pyproject.toml b/pyproject.toml index fe93801c4d..50e0f5a6ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ format-check = "black --check ." [tool.hatch.envs.test] extra-dependencies = [ "transformers[torch,sentencepiece]==4.38.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... - "huggingface_hub>=0.22.0", # TGI Generators and TEI Embedders + "huggingface_hub>=0.23.0", # TGI Generators and TEI Embedders "spacy>=3.7,<3.8", # NamedEntityExtractor "spacy-curated-transformers>=0.2,<=0.3", # NamedEntityExtractor "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl", # NamedEntityExtractor diff --git a/releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml b/releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml new file mode 100644 index 0000000000..4113e2e917 --- /dev/null +++ b/releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml @@ -0,0 +1,4 @@ +--- +upgrade: + - | + Upgraded the required version of `huggingface_hub` to `>=0.23.0` across various modules to ensure compatibility and leverage the latest features. This update includes modifications to error handling for token generation details and introduces adjustments in the chat and text generation interfaces to enhance functionality and developer experience. Users are advised to upgrade their `huggingface_hub` dependency. diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index df2b33618b..2c9d523c19 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -4,10 +4,10 @@ import pytest from huggingface_hub import ( ChatCompletionOutput, - ChatCompletionOutputChoice, - ChatCompletionOutputChoiceMessage, ChatCompletionStreamOutput, + ChatCompletionOutputComplete, ChatCompletionStreamOutputChoice, + ChatCompletionOutputMessage, ChatCompletionStreamOutputDelta, ) from huggingface_hub.utils import RepositoryNotFoundError @@ -33,14 +33,17 @@ def mock_chat_completion(): with patch("huggingface_hub.InferenceClient.chat_completion", autospec=True) as mock_chat_completion: completion = ChatCompletionOutput( choices=[ - ChatCompletionOutputChoice( + ChatCompletionOutputComplete( finish_reason="eos_token", index=0, - message=ChatCompletionOutputChoiceMessage( - content="The capital of France is Paris.", role="assistant" - ), + message=ChatCompletionOutputMessage(content="The capital of France is Paris.", role="assistant"), ) ], + id="some_id", + model="some_model", + object="some_object", + system_fingerprint="some_fingerprint", + usage={"completion_tokens": 10, "prompt_tokens": 5, "total_tokens": 15}, created=1710498360, ) @@ -208,6 +211,10 @@ def mock_iter(self): finish_reason=None, ) ], + id="some_id", + model="some_model", + object="some_object", + system_fingerprint="some_fingerprint", created=1710498504, ) @@ -217,6 +224,10 @@ def mock_iter(self): delta=ChatCompletionStreamOutputDelta(content=None, role=None), index=0, finish_reason="length" ) ], + id="some_id", + model="some_model", + object="some_object", + system_fingerprint="some_fingerprint", created=1710498504, ) diff --git a/test/components/generators/chat/test_hugging_face_tgi.py b/test/components/generators/chat/test_hugging_face_tgi.py index 5f9651d963..cd372bb32f 100644 --- a/test/components/generators/chat/test_hugging_face_tgi.py +++ b/test/components/generators/chat/test_hugging_face_tgi.py @@ -1,7 +1,11 @@ from unittest.mock import MagicMock, Mock, patch import pytest -from huggingface_hub import TextGenerationOutputToken, TextGenerationStreamDetails, TextGenerationStreamOutput +from huggingface_hub import ( + TextGenerationOutputToken, + TextGenerationStreamOutput, + TextGenerationStreamOutputStreamDetails, +) from huggingface_hub.utils import RepositoryNotFoundError from haystack.components.generators.chat import HuggingFaceTGIChatGenerator @@ -329,13 +333,15 @@ def streaming_callback_fn(chunk: StreamingChunk): # self needed here, don't remove def mock_iter(self): yield TextGenerationStreamOutput( + index=0, generated_text=None, token=TextGenerationOutputToken(id=1, text="I'm fine, thanks.", logprob=0.0, special=False), ) yield TextGenerationStreamOutput( + index=0, generated_text=None, token=TextGenerationOutputToken(id=1, text="Ok bye", logprob=0.0, special=False), - details=TextGenerationStreamDetails(finish_reason="length", generated_tokens=5, seed=None), + details=TextGenerationStreamOutputStreamDetails(finish_reason="length", generated_tokens=5, seed=None), ) mock_response = Mock(**{"__iter__": mock_iter}) diff --git a/test/components/generators/test_hugging_face_api.py b/test/components/generators/test_hugging_face_api.py index 8786e7f536..21bca849b6 100644 --- a/test/components/generators/test_hugging_face_api.py +++ b/test/components/generators/test_hugging_face_api.py @@ -2,7 +2,11 @@ from unittest.mock import MagicMock, Mock, patch import pytest -from huggingface_hub import TextGenerationOutputToken, TextGenerationStreamDetails, TextGenerationStreamOutput +from huggingface_hub import ( + TextGenerationOutputToken, + TextGenerationStreamOutput, + TextGenerationStreamOutputStreamDetails, +) from huggingface_hub.utils import RepositoryNotFoundError from haystack.components.generators import HuggingFaceAPIGenerator @@ -236,13 +240,15 @@ def streaming_callback_fn(chunk: StreamingChunk): # Don't remove self def mock_iter(self): yield TextGenerationStreamOutput( + index=0, generated_text=None, token=TextGenerationOutputToken(id=1, text="I'm fine, thanks.", logprob=0.0, special=False), ) yield TextGenerationStreamOutput( + index=1, generated_text=None, token=TextGenerationOutputToken(id=1, text="Ok bye", logprob=0.0, special=False), - details=TextGenerationStreamDetails(finish_reason="length", generated_tokens=5, seed=None), + details=TextGenerationStreamOutputStreamDetails(finish_reason="length", generated_tokens=5, seed=None), ) mock_response = Mock(**{"__iter__": mock_iter}) diff --git a/test/components/generators/test_hugging_face_tgi.py b/test/components/generators/test_hugging_face_tgi.py index 042ebd3077..329f3e0316 100644 --- a/test/components/generators/test_hugging_face_tgi.py +++ b/test/components/generators/test_hugging_face_tgi.py @@ -1,7 +1,11 @@ from unittest.mock import MagicMock, Mock, patch import pytest -from huggingface_hub import TextGenerationOutputToken, TextGenerationStreamDetails, TextGenerationStreamOutput +from huggingface_hub import ( + TextGenerationOutputToken, + TextGenerationStreamOutput, + TextGenerationStreamOutputStreamDetails, +) from huggingface_hub.utils import RepositoryNotFoundError from haystack.components.generators import HuggingFaceTGIGenerator @@ -271,13 +275,15 @@ def streaming_callback_fn(chunk: StreamingChunk): # Don't remove self def mock_iter(self): yield TextGenerationStreamOutput( + index=0, generated_text=None, token=TextGenerationOutputToken(id=1, text="I'm fine, thanks.", logprob=0.0, special=False), ) yield TextGenerationStreamOutput( + index=1, generated_text=None, token=TextGenerationOutputToken(id=1, text="Ok bye", logprob=0.0, special=False), - details=TextGenerationStreamDetails(finish_reason="length", generated_tokens=5, seed=None), + details=TextGenerationStreamOutputStreamDetails(finish_reason="length", generated_tokens=5, seed=None), ) mock_response = Mock(**{"__iter__": mock_iter})