From 5f813373eb4a220b542bc9a1e01893e1582843f3 Mon Sep 17 00:00:00 2001
From: Vladimir Blagojevic <dovlex@gmail.com>
Date: Fri, 3 May 2024 10:14:54 +0200
Subject: [PATCH] chore: Update huggingface_hub classes used after library
 upgrade (#7631)

* Update huggingface_hub classes used after library upgrade

* Fix chat tests

* Update lazy import guard and other references to huggingface_hub>=0.23.0

* In huggingface_hub 0.23.0 TextGenerationOutput property details is now optional

* More fixes

* Add reno note
---
 .../hugging_face_api_document_embedder.py     |  2 +-
 .../hugging_face_api_text_embedder.py         |  2 +-
 .../hugging_face_tei_document_embedder.py     |  2 +-
 .../hugging_face_tei_text_embedder.py         |  2 +-
 .../generators/chat/hugging_face_api.py       |  2 +-
 .../generators/chat/hugging_face_tgi.py       | 21 +++++++++++------
 .../components/generators/hugging_face_api.py |  6 ++---
 .../components/generators/hugging_face_tgi.py | 17 ++++++++++----
 haystack/utils/hf.py                          |  2 +-
 pyproject.toml                                |  2 +-
 ...gface-hub-dependency-9b8a89d50eb88fea.yaml |  4 ++++
 .../generators/chat/test_hugging_face_api.py  | 23 ++++++++++++++-----
 .../generators/chat/test_hugging_face_tgi.py  | 10 ++++++--
 .../generators/test_hugging_face_api.py       | 10 ++++++--
 .../generators/test_hugging_face_tgi.py       | 10 ++++++--
 15 files changed, 81 insertions(+), 34 deletions(-)
 create mode 100644 releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml

diff --git a/haystack/components/embedders/hugging_face_api_document_embedder.py b/haystack/components/embedders/hugging_face_api_document_embedder.py
index d6bcb6c52a..188d449821 100644
--- a/haystack/components/embedders/hugging_face_api_document_embedder.py
+++ b/haystack/components/embedders/hugging_face_api_document_embedder.py
@@ -10,7 +10,7 @@
 from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)
diff --git a/haystack/components/embedders/hugging_face_api_text_embedder.py b/haystack/components/embedders/hugging_face_api_text_embedder.py
index 7404f28396..10db3c0121 100644
--- a/haystack/components/embedders/hugging_face_api_text_embedder.py
+++ b/haystack/components/embedders/hugging_face_api_text_embedder.py
@@ -7,7 +7,7 @@
 from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)
diff --git a/haystack/components/embedders/hugging_face_tei_document_embedder.py b/haystack/components/embedders/hugging_face_tei_document_embedder.py
index e721395a15..8d55af0892 100644
--- a/haystack/components/embedders/hugging_face_tei_document_embedder.py
+++ b/haystack/components/embedders/hugging_face_tei_document_embedder.py
@@ -11,7 +11,7 @@
 from haystack.utils import Secret, deserialize_secrets_inplace
 from haystack.utils.hf import HFModelType, check_valid_model
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)
diff --git a/haystack/components/embedders/hugging_face_tei_text_embedder.py b/haystack/components/embedders/hugging_face_tei_text_embedder.py
index 5956ee1d83..477003f305 100644
--- a/haystack/components/embedders/hugging_face_tei_text_embedder.py
+++ b/haystack/components/embedders/hugging_face_tei_text_embedder.py
@@ -8,7 +8,7 @@
 from haystack.utils import Secret, deserialize_secrets_inplace
 from haystack.utils.hf import HFModelType, check_valid_model
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)
diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py
index eac3877aca..0deea74ab4 100644
--- a/haystack/components/generators/chat/hugging_face_api.py
+++ b/haystack/components/generators/chat/hugging_face_api.py
@@ -7,7 +7,7 @@
 from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import ChatCompletionOutput, ChatCompletionStreamOutput, InferenceClient
 
 
diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py
index 0956468639..704aaec9a0 100644
--- a/haystack/components/generators/chat/hugging_face_tgi.py
+++ b/haystack/components/generators/chat/hugging_face_tgi.py
@@ -9,7 +9,7 @@
 from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
 from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\" transformers'") as transformers_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\" transformers'") as transformers_import:
     from huggingface_hub import (
         InferenceClient,
         TextGenerationOutput,
@@ -275,13 +275,13 @@ def _run_streaming(
         message = ChatMessage.from_assistant(chunk.generated_text)
         message.meta.update(
             {
-                "finish_reason": chunk.details.finish_reason,
+                "finish_reason": chunk.details.finish_reason if chunk.details else None,
                 "index": 0,
                 "model": self.client.model,
                 "usage": {
-                    "completion_tokens": chunk.details.generated_tokens,
+                    "completion_tokens": chunk.details.generated_tokens if chunk.details else 0,
                     "prompt_tokens": prompt_token_count,
-                    "total_tokens": prompt_token_count + chunk.details.generated_tokens,
+                    "total_tokens": prompt_token_count + chunk.details.generated_tokens if chunk.details else 0,
                 },
             }
         )
@@ -294,15 +294,22 @@ def _run_non_streaming(
         for _i in range(num_responses):
             tgr: TextGenerationOutput = self.client.text_generation(prepared_prompt, details=True, **generation_kwargs)
             message = ChatMessage.from_assistant(tgr.generated_text)
+            if tgr.details:
+                completion_tokens = len(tgr.details.tokens)
+                prompt_token_count = prompt_token_count + completion_tokens
+                finish_reason = tgr.details.finish_reason
+            else:
+                finish_reason = None
+                completion_tokens = 0
             message.meta.update(
                 {
-                    "finish_reason": tgr.details.finish_reason,
+                    "finish_reason": finish_reason,
                     "index": _i,
                     "model": self.client.model,
                     "usage": {
-                        "completion_tokens": len(tgr.details.tokens),
+                        "completion_tokens": completion_tokens,
                         "prompt_tokens": prompt_token_count,
-                        "total_tokens": prompt_token_count + len(tgr.details.tokens),
+                        "total_tokens": prompt_token_count + completion_tokens,
                     },
                 }
             )
diff --git a/haystack/components/generators/hugging_face_api.py b/haystack/components/generators/hugging_face_api.py
index a6d34431c3..b0dd1aafb4 100644
--- a/haystack/components/generators/hugging_face_api.py
+++ b/haystack/components/generators/hugging_face_api.py
@@ -8,7 +8,7 @@
 from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import (
         InferenceClient,
         TextGenerationOutput,
@@ -208,8 +208,8 @@ def _run_non_streaming(self, prompt: str, generation_kwargs: Dict[str, Any]):
         meta = [
             {
                 "model": self._client.model,
-                "finish_reason": tgr.details.finish_reason,
-                "usage": {"completion_tokens": len(tgr.details.tokens)},
+                "finish_reason": tgr.details.finish_reason if tgr.details else None,
+                "usage": {"completion_tokens": len(tgr.details.tokens) if tgr.details else 0},
             }
         ]
         return {"replies": [tgr.generated_text], "meta": meta}
diff --git a/haystack/components/generators/hugging_face_tgi.py b/haystack/components/generators/hugging_face_tgi.py
index 1014a42999..24f5101785 100644
--- a/haystack/components/generators/hugging_face_tgi.py
+++ b/haystack/components/generators/hugging_face_tgi.py
@@ -9,7 +9,7 @@
 from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
 from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\" transformers'") as transformers_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\" transformers'") as transformers_import:
     from huggingface_hub import (
         InferenceClient,
         TextGenerationOutput,
@@ -57,7 +57,7 @@ class HuggingFaceTGIGenerator:
 
     client = HuggingFaceTGIGenerator(model="mistralai/Mistral-7B-v0.1", token=Secret.from_token("<your-api-key>"))
     client.warm_up()
-    response = client.run("What's Natural Language Processing?", max_new_tokens=120)
+    response = client.run("What's Natural Language Processing?", generation_kwargs={"max_new_tokens": 120})
     print(response)
     ```
 
@@ -255,15 +255,22 @@ def _run_non_streaming(
         all_metadata: List[Dict[str, Any]] = []
         for _i in range(num_responses):
             tgr: TextGenerationOutput = self.client.text_generation(prompt, details=True, **generation_kwargs)
+            if tgr.details:
+                completion_tokens = len(tgr.details.tokens)
+                prompt_token_count = prompt_token_count + completion_tokens
+                finish_reason = tgr.details.finish_reason
+            else:
+                finish_reason = None
+                completion_tokens = 0
             all_metadata.append(
                 {
                     "model": self.client.model,
                     "index": _i,
-                    "finish_reason": tgr.details.finish_reason,
+                    "finish_reason": finish_reason,
                     "usage": {
-                        "completion_tokens": len(tgr.details.tokens),
+                        "completion_tokens": completion_tokens,
                         "prompt_tokens": prompt_token_count,
-                        "total_tokens": prompt_token_count + len(tgr.details.tokens),
+                        "total_tokens": prompt_token_count + completion_tokens,
                     },
                 }
             )
diff --git a/haystack/utils/hf.py b/haystack/utils/hf.py
index 6e7d539bec..dcd423a103 100644
--- a/haystack/utils/hf.py
+++ b/haystack/utils/hf.py
@@ -14,7 +14,7 @@
 with LazyImport(message="Run 'pip install transformers[torch]'") as torch_import:
     import torch
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import HfApi, InferenceClient, model_info
     from huggingface_hub.utils import RepositoryNotFoundError
 
diff --git a/pyproject.toml b/pyproject.toml
index fe93801c4d..50e0f5a6ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -102,7 +102,7 @@ format-check = "black --check ."
 [tool.hatch.envs.test]
 extra-dependencies = [
   "transformers[torch,sentencepiece]==4.38.2",  # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
-  "huggingface_hub>=0.22.0", # TGI Generators and TEI Embedders
+  "huggingface_hub>=0.23.0", # TGI Generators and TEI Embedders
   "spacy>=3.7,<3.8",  # NamedEntityExtractor
   "spacy-curated-transformers>=0.2,<=0.3",  # NamedEntityExtractor
   "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl",  # NamedEntityExtractor
diff --git a/releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml b/releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml
new file mode 100644
index 0000000000..4113e2e917
--- /dev/null
+++ b/releasenotes/notes/upgrade-huggingface-hub-dependency-9b8a89d50eb88fea.yaml
@@ -0,0 +1,4 @@
+---
+upgrade:
+ - |
+   Upgraded the required version of `huggingface_hub` to `>=0.23.0` across various modules to ensure compatibility and leverage the latest features. This update includes modifications to error handling for token generation details and introduces adjustments in the chat and text generation interfaces to enhance functionality and developer experience. Users are advised to upgrade their `huggingface_hub` dependency.
diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py
index df2b33618b..2c9d523c19 100644
--- a/test/components/generators/chat/test_hugging_face_api.py
+++ b/test/components/generators/chat/test_hugging_face_api.py
@@ -4,10 +4,10 @@
 import pytest
 from huggingface_hub import (
     ChatCompletionOutput,
-    ChatCompletionOutputChoice,
-    ChatCompletionOutputChoiceMessage,
     ChatCompletionStreamOutput,
+    ChatCompletionOutputComplete,
     ChatCompletionStreamOutputChoice,
+    ChatCompletionOutputMessage,
     ChatCompletionStreamOutputDelta,
 )
 from huggingface_hub.utils import RepositoryNotFoundError
@@ -33,14 +33,17 @@ def mock_chat_completion():
     with patch("huggingface_hub.InferenceClient.chat_completion", autospec=True) as mock_chat_completion:
         completion = ChatCompletionOutput(
             choices=[
-                ChatCompletionOutputChoice(
+                ChatCompletionOutputComplete(
                     finish_reason="eos_token",
                     index=0,
-                    message=ChatCompletionOutputChoiceMessage(
-                        content="The capital of France is Paris.", role="assistant"
-                    ),
+                    message=ChatCompletionOutputMessage(content="The capital of France is Paris.", role="assistant"),
                 )
             ],
+            id="some_id",
+            model="some_model",
+            object="some_object",
+            system_fingerprint="some_fingerprint",
+            usage={"completion_tokens": 10, "prompt_tokens": 5, "total_tokens": 15},
             created=1710498360,
         )
 
@@ -208,6 +211,10 @@ def mock_iter(self):
                         finish_reason=None,
                     )
                 ],
+                id="some_id",
+                model="some_model",
+                object="some_object",
+                system_fingerprint="some_fingerprint",
                 created=1710498504,
             )
 
@@ -217,6 +224,10 @@ def mock_iter(self):
                         delta=ChatCompletionStreamOutputDelta(content=None, role=None), index=0, finish_reason="length"
                     )
                 ],
+                id="some_id",
+                model="some_model",
+                object="some_object",
+                system_fingerprint="some_fingerprint",
                 created=1710498504,
             )
 
diff --git a/test/components/generators/chat/test_hugging_face_tgi.py b/test/components/generators/chat/test_hugging_face_tgi.py
index 5f9651d963..cd372bb32f 100644
--- a/test/components/generators/chat/test_hugging_face_tgi.py
+++ b/test/components/generators/chat/test_hugging_face_tgi.py
@@ -1,7 +1,11 @@
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
-from huggingface_hub import TextGenerationOutputToken, TextGenerationStreamDetails, TextGenerationStreamOutput
+from huggingface_hub import (
+    TextGenerationOutputToken,
+    TextGenerationStreamOutput,
+    TextGenerationStreamOutputStreamDetails,
+)
 from huggingface_hub.utils import RepositoryNotFoundError
 
 from haystack.components.generators.chat import HuggingFaceTGIChatGenerator
@@ -329,13 +333,15 @@ def streaming_callback_fn(chunk: StreamingChunk):
         # self needed here, don't remove
         def mock_iter(self):
             yield TextGenerationStreamOutput(
+                index=0,
                 generated_text=None,
                 token=TextGenerationOutputToken(id=1, text="I'm fine, thanks.", logprob=0.0, special=False),
             )
             yield TextGenerationStreamOutput(
+                index=0,
                 generated_text=None,
                 token=TextGenerationOutputToken(id=1, text="Ok bye", logprob=0.0, special=False),
-                details=TextGenerationStreamDetails(finish_reason="length", generated_tokens=5, seed=None),
+                details=TextGenerationStreamOutputStreamDetails(finish_reason="length", generated_tokens=5, seed=None),
             )
 
         mock_response = Mock(**{"__iter__": mock_iter})
diff --git a/test/components/generators/test_hugging_face_api.py b/test/components/generators/test_hugging_face_api.py
index 8786e7f536..21bca849b6 100644
--- a/test/components/generators/test_hugging_face_api.py
+++ b/test/components/generators/test_hugging_face_api.py
@@ -2,7 +2,11 @@
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
-from huggingface_hub import TextGenerationOutputToken, TextGenerationStreamDetails, TextGenerationStreamOutput
+from huggingface_hub import (
+    TextGenerationOutputToken,
+    TextGenerationStreamOutput,
+    TextGenerationStreamOutputStreamDetails,
+)
 from huggingface_hub.utils import RepositoryNotFoundError
 
 from haystack.components.generators import HuggingFaceAPIGenerator
@@ -236,13 +240,15 @@ def streaming_callback_fn(chunk: StreamingChunk):
         # Don't remove self
         def mock_iter(self):
             yield TextGenerationStreamOutput(
+                index=0,
                 generated_text=None,
                 token=TextGenerationOutputToken(id=1, text="I'm fine, thanks.", logprob=0.0, special=False),
             )
             yield TextGenerationStreamOutput(
+                index=1,
                 generated_text=None,
                 token=TextGenerationOutputToken(id=1, text="Ok bye", logprob=0.0, special=False),
-                details=TextGenerationStreamDetails(finish_reason="length", generated_tokens=5, seed=None),
+                details=TextGenerationStreamOutputStreamDetails(finish_reason="length", generated_tokens=5, seed=None),
             )
 
         mock_response = Mock(**{"__iter__": mock_iter})
diff --git a/test/components/generators/test_hugging_face_tgi.py b/test/components/generators/test_hugging_face_tgi.py
index 042ebd3077..329f3e0316 100644
--- a/test/components/generators/test_hugging_face_tgi.py
+++ b/test/components/generators/test_hugging_face_tgi.py
@@ -1,7 +1,11 @@
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
-from huggingface_hub import TextGenerationOutputToken, TextGenerationStreamDetails, TextGenerationStreamOutput
+from huggingface_hub import (
+    TextGenerationOutputToken,
+    TextGenerationStreamOutput,
+    TextGenerationStreamOutputStreamDetails,
+)
 from huggingface_hub.utils import RepositoryNotFoundError
 
 from haystack.components.generators import HuggingFaceTGIGenerator
@@ -271,13 +275,15 @@ def streaming_callback_fn(chunk: StreamingChunk):
         # Don't remove self
         def mock_iter(self):
             yield TextGenerationStreamOutput(
+                index=0,
                 generated_text=None,
                 token=TextGenerationOutputToken(id=1, text="I'm fine, thanks.", logprob=0.0, special=False),
             )
             yield TextGenerationStreamOutput(
+                index=1,
                 generated_text=None,
                 token=TextGenerationOutputToken(id=1, text="Ok bye", logprob=0.0, special=False),
-                details=TextGenerationStreamDetails(finish_reason="length", generated_tokens=5, seed=None),
+                details=TextGenerationStreamOutputStreamDetails(finish_reason="length", generated_tokens=5, seed=None),
             )
 
         mock_response = Mock(**{"__iter__": mock_iter})