Merge pull request #141 from deepset-ai/update-to-gpt4omini

replace `gpt-3.5-turbo` with `gpt-4o-mini`
deepset-ai · Oct 4, 2024 · 186468c · 186468c
2 parents ba1e9bf + 0eeb481
commit 186468c
Show file tree

Hide file tree

Showing 14 changed files with 24 additions and 24 deletions.
diff --git a/notebooks/apify_haystack_instagram_comments_analysis.ipynb b/notebooks/apify_haystack_instagram_comments_analysis.ipynb
@@ -233,7 +233,7 @@
         "\n",
         "cleaner = DocumentCleaner(remove_empty_lines=True, remove_extra_whitespaces=True, remove_repeated_substrings=True)\n",
         "prompt_builder = PromptBuilder(template=prompt)\n",
-        "generator = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
+        "generator = OpenAIGenerator(model=\"gpt-4o-mini\")\n",
         "\n",
         "\n",
         "pipe = Pipeline()\n",

diff --git a/notebooks/apify_haystack_rag.ipynb b/notebooks/apify_haystack_rag.ipynb
@@ -328,7 +328,7 @@
         "\n",
         "text_embedder = OpenAITextEmbedder()\n",
         "retriever = InMemoryEmbeddingRetriever(document_store)\n",
-        "generator = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
+        "generator = OpenAIGenerator(model=\"gpt-4o-mini\")\n",
         "\n",
         "template = \"\"\"\n",
         "Given the following information, answer the question.\n",

diff --git a/notebooks/astradb_haystack_integration.ipynb b/notebooks/astradb_haystack_integration.ipynb
@@ -253,7 +253,7 @@
           "name": "stdout",
           "output_type": "stream",
           "text": [
-            "{'answer_builder': {'answers': [GeneratedAnswer(data='There are over 7,000 languages spoken around the world today.', query='How many languages are there in the world today?', documents=[Document(id=cfe93bc1c274908801e6670440bf2bbba54fad792770d57421f85ffa2a4fcc94, content: 'There are over 7,000 languages spoken around the world today.', score: 0.9267925, embedding: vector of size 384), Document(id=6f20658aeac3c102495b198401c1c0c2bd71d77b915820304d4fbc324b2f3cdb, content: 'Elephants have been observed to behave in a way that indicates a high level of self-awareness, such ...', score: 0.5357444, embedding: vector of size 384)], meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 14, 'prompt_tokens': 83, 'total_tokens': 97}})]}}\n"
+            "{'answer_builder': {'answers': [GeneratedAnswer(data='There are over 7,000 languages spoken around the world today.', query='How many languages are there in the world today?', documents=[Document(id=cfe93bc1c274908801e6670440bf2bbba54fad792770d57421f85ffa2a4fcc94, content: 'There are over 7,000 languages spoken around the world today.', score: 0.9267925, embedding: vector of size 384), Document(id=6f20658aeac3c102495b198401c1c0c2bd71d77b915820304d4fbc324b2f3cdb, content: 'Elephants have been observed to behave in a way that indicates a high level of self-awareness, such ...', score: 0.5357444, embedding: vector of size 384)], meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 14, 'prompt_tokens': 83, 'total_tokens': 97}})]}}\n"
           ]
         }
       ],
@@ -316,7 +316,7 @@
       "source": [
         "The output should be something like this:\n",
         "```bash\n",
-        "{'answer_builder': {'answers': [GeneratedAnswer(data='There are over 7,000 languages spoken around the world today.', query='How many languages are there in the world today?', documents=[Document(id=cfe93bc1c274908801e6670440bf2bbba54fad792770d57421f85ffa2a4fcc94, content: 'There are over 7,000 languages spoken around the world today.', score: 0.9267925, embedding: vector of size 384), Document(id=6f20658aeac3c102495b198401c1c0c2bd71d77b915820304d4fbc324b2f3cdb, content: 'Elephants have been observed to behave in a way that indicates a high level of self-awareness, such ...', score: 0.5357444, embedding: vector of size 384)], meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 14, 'prompt_tokens': 83, 'total_tokens': 97}})]}}\n",
+        "{'answer_builder': {'answers': [GeneratedAnswer(data='There are over 7,000 languages spoken around the world today.', query='How many languages are there in the world today?', documents=[Document(id=cfe93bc1c274908801e6670440bf2bbba54fad792770d57421f85ffa2a4fcc94, content: 'There are over 7,000 languages spoken around the world today.', score: 0.9267925, embedding: vector of size 384), Document(id=6f20658aeac3c102495b198401c1c0c2bd71d77b915820304d4fbc324b2f3cdb, content: 'Elephants have been observed to behave in a way that indicates a high level of self-awareness, such ...', score: 0.5357444, embedding: vector of size 384)], meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 14, 'prompt_tokens': 83, 'total_tokens': 97}})]}}\n",
         "```\n",
         "\n",
         "Now that you understand how to use AstraDB as a data source for your Haystack pipeline. Thanks for reading! To learn more about Haystack, [join us on Discord](https://discord.gg/QMP5jgMH) or [sign up for our Monthly newsletter](https://landing.deepset.ai/haystack-community-updates?utm_campaign=developer-relations&utm_source=astradb-haystack-notebook)."

diff --git a/notebooks/extracting_metadata_filters_from_a_user_query.ipynb b/notebooks/extracting_metadata_filters_from_a_user_query.ipynb
@@ -140,7 +140,7 @@
         "        \"\"\"\n",
         "        self.pipeline = Pipeline()\n",
         "        self.pipeline.add_component(name=\"builder\", instance=PromptBuilder(prompt))\n",
-        "        self.pipeline.add_component(name=\"llm\", instance=OpenAIGenerator(model=\"gpt-3.5-turbo\"))\n",
+        "        self.pipeline.add_component(name=\"llm\", instance=OpenAIGenerator(model=\"gpt-4o-mini\"))\n",
         "        self.pipeline.connect(\"builder\", \"llm\")\n",
         "\n",
         "    @component.output_types(filters=Dict[str, str])\n",

diff --git a/notebooks/function_calling_with_OpenAIChatGenerator.ipynb b/notebooks/function_calling_with_OpenAIChatGenerator.ipynb
@@ -103,7 +103,7 @@
           "name": "stdout",
           "output_type": "stream",
           "text": [
-            "{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that deals with the interaction between computers and humans in natural language. It focuses on the understanding, interpretation, and generation of human language to enable machines to process and analyze textual data efficiently.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 50, 'prompt_tokens': 16, 'total_tokens': 66}})]}\n"
+            "{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that deals with the interaction between computers and humans in natural language. It focuses on the understanding, interpretation, and generation of human language to enable machines to process and analyze textual data efficiently.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 50, 'prompt_tokens': 16, 'total_tokens': 66}})]}\n"
           ]
         }
       ],
@@ -300,7 +300,7 @@
         {
           "data": {
             "text/plain": [
-              "{'replies': [ChatMessage(content='[{\"index\": 0, \"id\": \"call_fFQKCAUba8RRu2BZ4v8IVYPH\", \"function\": {\"arguments\": \"{\\\\n  \\\\\"location\\\\\": \\\\\"Berlin\\\\\",\\\\n  \\\\\"unit\\\\\": \\\\\"celsius\\\\\"\\\\n}\", \"name\": \"get_current_weather\"}, \"type\": \"function\"}]', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'tool_calls', 'usage': {}})]}"
+              "{'replies': [ChatMessage(content='[{\"index\": 0, \"id\": \"call_fFQKCAUba8RRu2BZ4v8IVYPH\", \"function\": {\"arguments\": \"{\\\\n  \\\\\"location\\\\\": \\\\\"Berlin\\\\\",\\\\n  \\\\\"unit\\\\\": \\\\\"celsius\\\\\"\\\\n}\", \"name\": \"get_current_weather\"}, \"type\": \"function\"}]', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'tool_calls', 'usage': {}})]}"
             ]
           },
           "execution_count": 8,

diff --git a/notebooks/model_explorer_streaming.ipynb b/notebooks/model_explorer_streaming.ipynb
@@ -859,7 +859,7 @@
                   "application/vnd.google.colaboratory.intrinsic+json": {
                     "type": "string"
                   },
-                  "text/plain": "'Model name: gpt-3.5-turbo'"
+                  "text/plain": "'Model name: gpt-4o-mini'"
                 },
                 "metadata": {},
                 "output_type": "display_data"

diff --git a/notebooks/openapitool.ipynb b/notebooks/openapitool.ipynb
@@ -488,7 +488,7 @@
         {
           "data": {
             "text/plain": [
-              "{'llm': {'meta': [{'model': 'gpt-3.5-turbo-0125',\n",
+              "{'llm': {'meta': [{'model': 'gpt-4o-mini-2024-07-18',\n",
               "    'index': 0,\n",
               "    'finish_reason': 'stop',\n",
               "    'usage': {'completion_tokens': 23,\n",
@@ -522,7 +522,7 @@
         {
           "data": {
             "text/plain": [
-              "{'llm': {'meta': [{'model': 'gpt-3.5-turbo-0125',\n",
+              "{'llm': {'meta': [{'model': 'gpt-4o-mini-2024-07-18',\n",
               "    'index': 0,\n",
               "    'finish_reason': 'stop',\n",
               "    'usage': {'completion_tokens': 26,\n",

diff --git a/notebooks/prompt_optimization_with_dspy.ipynb b/notebooks/prompt_optimization_with_dspy.ipynb
@@ -400,7 +400,7 @@
         "\n",
         "\n",
         "retriever = InMemoryBM25Retriever(document_store, top_k=3)\n",
-        "generator = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
+        "generator = OpenAIGenerator(model=\"gpt-4o-mini\")\n",
         "\n",
         "template = \"\"\"\n",
         "Given the following information, answer the question.\n",
@@ -613,7 +613,7 @@
         "from dspy.primitives.prediction import Prediction\n",
         "\n",
         "\n",
-        "lm = dspy.OpenAI(model='gpt-3.5-turbo')\n",
+        "lm = dspy.OpenAI(model='gpt-4o-mini')\n",
         "dspy.settings.configure(lm=lm)"
       ]
     },
@@ -5006,7 +5006,7 @@
         "new_prompt_builder = PromptBuilder(template=template)\n",
         "\n",
         "new_retriever = InMemoryBM25Retriever(document_store, top_k=3)\n",
-        "new_generator = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
+        "new_generator = OpenAIGenerator(model=\"gpt-4o-mini\")\n",
         "\n",
         "answer_builder = AnswerBuilder(pattern=\"Answer: (.*)\")\n",
         "\n",

diff --git a/notebooks/query-expansion.ipynb b/notebooks/query-expansion.ipynb
@@ -96,7 +96,7 @@
         "@component\n",
         "class QueryExpander:\n",
         "\n",
-        "    def __init__(self, prompt: Optional[str] = None, model: str = \"gpt-3.5-turbo\"):\n",
+        "    def __init__(self, prompt: Optional[str] = None, model: str = \"gpt-4o-mini\"):\n",
         "\n",
         "        self.query_expansion_prompt = prompt\n",
         "        self.model = model\n",
@@ -483,7 +483,7 @@
           "data": {
             "text/plain": [
               "{'llm': {'replies': ['Green energy sources refer to energy derived from renewable natural resources that are replenished over time. Wind power, specifically, is a type of green energy that harnesses wind energy to generate useful work. These sources contrast with fossil fuels like coal, oil, and natural gas, which are not considered green energy due to their carbon content. (Sources: Renewable energy - Wikipedia, Wind power - Wikipedia)'],\n",
-              "  'meta': [{'model': 'gpt-3.5-turbo-0125',\n",
+              "  'meta': [{'model': 'gpt-4o-mini-2024-07-18',\n",
               "    'index': 0,\n",
               "    'finish_reason': 'stop',\n",
               "    'usage': {'completion_tokens': 79,\n",
@@ -613,7 +613,7 @@
           "data": {
             "text/plain": [
               "{'llm': {'replies': ['Green energy sources refer to renewable energy from natural resources like wind power and electric vehicles that use electric motors for propulsion. This type of energy is sustainable and replenished over time, unlike fossil fuels or nuclear power. [Source: Wikipedia]'],\n",
-              "  'meta': [{'model': 'gpt-3.5-turbo-0125',\n",
+              "  'meta': [{'model': 'gpt-4o-mini-2024-07-18',\n",
               "    'index': 0,\n",
               "    'finish_reason': 'stop',\n",
               "    'usage': {'completion_tokens': 47,\n",

diff --git a/notebooks/rag_eval_deep_eval.ipynb b/notebooks/rag_eval_deep_eval.ipynb
@@ -298,7 +298,7 @@
         "\"\"\"\n",
         "\n",
         "prompt_builder = PromptBuilder(template=template)\n",
-        "generator = OpenAIGenerator(model=\"gpt-3.5-turbo-0125\")"
+        "generator = OpenAIGenerator(model=\"gpt-4o-mini-2024-07-18\")"
       ]
     },
     {

diff --git a/notebooks/rag_eval_harness.ipynb b/notebooks/rag_eval_harness.ipynb
@@ -856,7 +856,7 @@
         "    )\n",
         "    pipeline.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
         "    pipeline.add_component(\n",
-        "        \"generator\", OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
+        "        \"generator\", OpenAIGenerator(model=\"gpt-4o-mini\")\n",
         "    )\n",
         "    pipeline.add_component(\"answer_builder\", AnswerBuilder())\n",
         "\n",
@@ -1046,7 +1046,7 @@
         "    )\n",
         "    pipeline.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
         "    pipeline.add_component(\n",
-        "        \"generator\", OpenAIGenerator(model=\"gpt-3.5-turbo\")\n",
+        "        \"generator\", OpenAIGenerator(model=\"gpt-4o-mini\")\n",
         "    )\n",
         "    pipeline.add_component(\"answer_builder\", AnswerBuilder())\n",
         "\n",

diff --git a/notebooks/rag_eval_ragas.ipynb b/notebooks/rag_eval_ragas.ipynb
@@ -181,7 +181,7 @@
         "\n",
         "prompt_builder = PromptBuilder(template=template)\n",
         "\n",
-        "generator = OpenAIGenerator(model=\"gpt-3.5-turbo-0125\")"
+        "generator = OpenAIGenerator(model=\"gpt-4o-mini-2024-07-18\")"
       ]
     },
     {

diff --git a/notebooks/rag_eval_uptrain.ipynb b/notebooks/rag_eval_uptrain.ipynb
@@ -170,7 +170,7 @@
         "\n",
         "\n",
         "os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter OpenAI API key:\")\n",
-        "generator = OpenAIGenerator(model=\"gpt-3.5-turbo-0125\")"
+        "generator = OpenAIGenerator(model=\"gpt-4o-mini-2024-07-18\")"
       ]
     },
     {

diff --git a/notebooks/using_hyde_for_improved_retrieval.ipynb b/notebooks/using_hyde_for_improved_retrieval.ipynb
@@ -83,7 +83,7 @@
         "from haystack.components.builders import PromptBuilder\n",
         "\n",
         "generator = OpenAIGenerator(\n",
-        "    model=\"gpt-3.5-turbo\",\n",
+        "    model=\"gpt-4o-mini\",\n",
         "    generation_kwargs={\"n\": 5, \"temperature\": 0.75, \"max_tokens\": 400},\n",
         ")\n",
         "\n",
@@ -269,7 +269,7 @@
         "\n",
         "    def __init__(\n",
         "        self,\n",
-        "        instruct_llm: str = \"gpt-3.5-turbo\",\n",
+        "        instruct_llm: str = \"gpt-4o-mini\",\n",
         "        instruct_llm_api_key: Secret = Secret.from_env_var(\"OPENAI_API_KEY\"),\n",
         "        nr_completions: int = 5,\n",
         "        embedder_model: str = \"sentence-transformers/all-MiniLM-L6-v2\",\n",
@@ -464,7 +464,7 @@
         "from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n",
         "\n",
         "def retriever_with_hyde(doc_store):\n",
-        "    hyde = HypotheticalDocumentEmbedder(instruct_llm=\"gpt-3.5-turbo\", nr_completions=5)\n",
+        "    hyde = HypotheticalDocumentEmbedder(instruct_llm=\"gpt-4o-mini\", nr_completions=5)\n",
         "    retriever = InMemoryEmbeddingRetriever(document_store=doc_store)\n",
         "\n",
         "    retrieval_pipeline = Pipeline()\n",
-Original file line number
+Diff line change
@@ Expand Up / @@ -181,7 +181,7 @@ @@
             "\n",
             "prompt_builder = PromptBuilder(template=template)\n",
             "\n",
-            "generator = OpenAIGenerator(model=\"gpt-3.5-turbo-0125\")"
+            "generator = OpenAIGenerator(model=\"gpt-4o-mini-2024-07-18\")"
           ]
         },
         {
@@ Expand Down @@