Merge branch 'main' into medsriha-patch-1

deepset-ai · May 3, 2024 · ae0f157 · ae0f157
2 parents ea3e531 + cd66a80
commit ae0f157
Show file tree

Hide file tree

Showing 21 changed files with 402 additions and 102 deletions.
diff --git a/.github/workflows/minor_version_release.yml b/.github/workflows/minor_version_release.yml
@@ -53,17 +53,37 @@ jobs:
       - name: Bump version on ${{ steps.branch.outputs.name }}
         shell: bash
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # We use the HAYSTACK_BOT_TOKEN here so the PR created by the step will
+          # trigger required workflows and can be merged by anyone
+          GITHUB_TOKEN: ${{ secrets.HAYSTACK_BOT_TOKEN }}
         run: |
           git checkout "${{ steps.branch.outputs.name }}"
+
+          # Tag the base with X.Y.Z-rc0.
+          # At this point VERSION.txt still contains the previous version and not
+          # the one specified by the tag.
+          # This is good though as we just need this to make reno work properly.
           NEW_VERSION=$(awk -F. '/[0-9]+\./{$2++;print}' OFS=. < VERSION.txt)
           echo "$NEW_VERSION" > VERSION.txt
+          VERSION_TAG="v$NEW_VERSION"
+          git tag "$VERSION_TAG" -m"$VERSION_TAG"
+          git push --tags
+
+          # Create the branch that bump version in dev branch
           cat VERSION.txt
+          git checkout -b bump-version
           git add .
           git commit -m "Update unstable version to $NEW_VERSION"
-          VERSION_TAG="v$NEW_VERSION"
-          git tag "$VERSION_TAG" -m"$VERSION_TAG"
-          git push --atomic origin "${{ steps.branch.outputs.name }}" "$VERSION_TAG"
+          git push -u origin bump-version
+
+          # Create the PR
+          gh pr create -B "${{ steps.branch.outputs.name }}" \
+            -H bump-version \
+            --title "Bump unstable version" \
+            --body "This PR bumps the unstable version for ${{ inputs.version }}.\n \
+          The release branch \`v${{ steps.versions.outputs.current_release_minor }}.x\` has been correctly created.\n\
+          Verify documentation on Readme has been correctly updated before approving and merging this PR." \
+            --label "ignore-for-release-notes"
 
       - uses: actions/setup-python@v5
         with:

diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-2.1.0-rc0
+2.2.0-rc0
diff --git a/e2e/pipelines/test_evaluation_pipeline.py b/e2e/pipelines/test_evaluation_pipeline.py
@@ -80,7 +80,7 @@ def evaluation_pipeline():
     """
     eval_pipeline = Pipeline()
     eval_pipeline.add_component("doc_mrr", DocumentMRREvaluator())
-    eval_pipeline.add_component("groundness", FaithfulnessEvaluator())
+    eval_pipeline.add_component("groundedness", FaithfulnessEvaluator())
     eval_pipeline.add_component("sas", SASEvaluator(model=EMBEDDINGS_MODEL))
     eval_pipeline.add_component("doc_map", DocumentMAPEvaluator())
     eval_pipeline.add_component("doc_recall_single_hit", DocumentRecallEvaluator(mode=RecallMode.SINGLE_HIT))
@@ -94,7 +94,7 @@ def built_eval_input(questions, truth_docs, truth_answers, retrieved_docs, conte
     """Helper function to build the input for the evaluation pipeline"""
     return {
         "doc_mrr": {"ground_truth_documents": truth_docs, "retrieved_documents": retrieved_docs},
-        "groundness": {"questions": questions, "contexts": contexts, "responses": truth_answers},
+        "groundedness": {"questions": questions, "contexts": contexts, "predicted_answers": pred_answers},
         "sas": {"predicted_answers": pred_answers, "ground_truth_answers": truth_answers},
         "doc_map": {"ground_truth_documents": truth_docs, "retrieved_documents": retrieved_docs},
         "doc_recall_single_hit": {"ground_truth_documents": truth_docs, "retrieved_documents": retrieved_docs},
@@ -141,8 +141,8 @@ def built_input_for_results_eval(rag_results):
             "score": rag_results["sas"]["score"],
         },
         "Faithfulness": {
-            "individual_scores": rag_results["groundness"]["individual_scores"],
-            "score": rag_results["groundness"]["score"],
+            "individual_scores": rag_results["groundedness"]["individual_scores"],
+            "score": rag_results["groundedness"]["score"],
         },
         "Document MAP": {
             "individual_scores": rag_results["doc_map"]["individual_scores"],

diff --git a/haystack/components/embedders/hugging_face_api_document_embedder.py b/haystack/components/embedders/hugging_face_api_document_embedder.py
@@ -10,7 +10,7 @@
 from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)

diff --git a/haystack/components/embedders/hugging_face_api_text_embedder.py b/haystack/components/embedders/hugging_face_api_text_embedder.py
@@ -7,7 +7,7 @@
 from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)

diff --git a/haystack/components/embedders/hugging_face_tei_document_embedder.py b/haystack/components/embedders/hugging_face_tei_document_embedder.py
@@ -11,7 +11,7 @@
 from haystack.utils import Secret, deserialize_secrets_inplace
 from haystack.utils.hf import HFModelType, check_valid_model
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)

diff --git a/haystack/components/embedders/hugging_face_tei_text_embedder.py b/haystack/components/embedders/hugging_face_tei_text_embedder.py
@@ -8,7 +8,7 @@
 from haystack.utils import Secret, deserialize_secrets_inplace
 from haystack.utils.hf import HFModelType, check_valid_model
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import InferenceClient
 
 logger = logging.getLogger(__name__)

diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py
@@ -7,7 +7,7 @@
 from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import ChatCompletionOutput, ChatCompletionStreamOutput, InferenceClient
 
 

diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py
@@ -9,7 +9,7 @@
 from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
 from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\" transformers'") as transformers_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\" transformers'") as transformers_import:
     from huggingface_hub import (
         InferenceClient,
         TextGenerationOutput,
@@ -275,13 +275,13 @@ def _run_streaming(
         message = ChatMessage.from_assistant(chunk.generated_text)
         message.meta.update(
             {
-                "finish_reason": chunk.details.finish_reason,
+                "finish_reason": chunk.details.finish_reason if chunk.details else None,
                 "index": 0,
                 "model": self.client.model,
                 "usage": {
-                    "completion_tokens": chunk.details.generated_tokens,
+                    "completion_tokens": chunk.details.generated_tokens if chunk.details else 0,
                     "prompt_tokens": prompt_token_count,
-                    "total_tokens": prompt_token_count + chunk.details.generated_tokens,
+                    "total_tokens": prompt_token_count + chunk.details.generated_tokens if chunk.details else 0,
                 },
             }
         )
@@ -294,15 +294,22 @@ def _run_non_streaming(
         for _i in range(num_responses):
             tgr: TextGenerationOutput = self.client.text_generation(prepared_prompt, details=True, **generation_kwargs)
             message = ChatMessage.from_assistant(tgr.generated_text)
+            if tgr.details:
+                completion_tokens = len(tgr.details.tokens)
+                prompt_token_count = prompt_token_count + completion_tokens
+                finish_reason = tgr.details.finish_reason
+            else:
+                finish_reason = None
+                completion_tokens = 0
             message.meta.update(
                 {
-                    "finish_reason": tgr.details.finish_reason,
+                    "finish_reason": finish_reason,
                     "index": _i,
                     "model": self.client.model,
                     "usage": {
-                        "completion_tokens": len(tgr.details.tokens),
+                        "completion_tokens": completion_tokens,
                         "prompt_tokens": prompt_token_count,
-                        "total_tokens": prompt_token_count + len(tgr.details.tokens),
+                        "total_tokens": prompt_token_count + completion_tokens,
                     },
                 }
             )

diff --git a/haystack/components/generators/hugging_face_api.py b/haystack/components/generators/hugging_face_api.py
@@ -8,7 +8,7 @@
 from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
 from haystack.utils.url_validation import is_valid_http_url
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\"'") as huggingface_hub_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\"'") as huggingface_hub_import:
     from huggingface_hub import (
         InferenceClient,
         TextGenerationOutput,
@@ -208,8 +208,8 @@ def _run_non_streaming(self, prompt: str, generation_kwargs: Dict[str, Any]):
         meta = [
             {
                 "model": self._client.model,
-                "finish_reason": tgr.details.finish_reason,
-                "usage": {"completion_tokens": len(tgr.details.tokens)},
+                "finish_reason": tgr.details.finish_reason if tgr.details else None,
+                "usage": {"completion_tokens": len(tgr.details.tokens) if tgr.details else 0},
             }
         ]
         return {"replies": [tgr.generated_text], "meta": meta}
diff --git a/haystack/components/generators/hugging_face_tgi.py b/haystack/components/generators/hugging_face_tgi.py
@@ -9,7 +9,7 @@
 from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
 from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models
 
-with LazyImport(message="Run 'pip install \"huggingface_hub>=0.22.0\" transformers'") as transformers_import:
+with LazyImport(message="Run 'pip install \"huggingface_hub>=0.23.0\" transformers'") as transformers_import:
     from huggingface_hub import (
         InferenceClient,
         TextGenerationOutput,
@@ -57,7 +57,7 @@ class HuggingFaceTGIGenerator:
 
     client = HuggingFaceTGIGenerator(model="mistralai/Mistral-7B-v0.1", token=Secret.from_token("<your-api-key>"))
     client.warm_up()
-    response = client.run("What's Natural Language Processing?", max_new_tokens=120)
+    response = client.run("What's Natural Language Processing?", generation_kwargs={"max_new_tokens": 120})
     print(response)
     ```
 
@@ -255,15 +255,22 @@ def _run_non_streaming(
         all_metadata: List[Dict[str, Any]] = []
         for _i in range(num_responses):
             tgr: TextGenerationOutput = self.client.text_generation(prompt, details=True, **generation_kwargs)
+            if tgr.details:
+                completion_tokens = len(tgr.details.tokens)
+                prompt_token_count = prompt_token_count + completion_tokens
+                finish_reason = tgr.details.finish_reason
+            else:
+                finish_reason = None
+                completion_tokens = 0
             all_metadata.append(
                 {
                     "model": self.client.model,
                     "index": _i,
-                    "finish_reason": tgr.details.finish_reason,
+                    "finish_reason": finish_reason,
                     "usage": {
-                        "completion_tokens": len(tgr.details.tokens),
+                        "completion_tokens": completion_tokens,
                         "prompt_tokens": prompt_token_count,
-                        "total_tokens": prompt_token_count + len(tgr.details.tokens),
+                        "total_tokens": prompt_token_count + completion_tokens,
                     },
                 }
             )