Fix prefix removal when streaming inline completions (#879)

* Fix removal of prefix when streaming * Fix typo Co-authored-by: david qiu <[email protected]> --------- Co-authored-by: david qiu <[email protected]>
jupyterlab · Jul 8, 2024 · ab38fe7 · ab38fe7
1 parent 4664a0b
commit ab38fe7
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 16 deletions.
diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py
@@ -485,7 +485,7 @@ async def stream_inline_completions(
         chain = self._create_completion_chain()
         token = completion.token_from_request(request, 0)
         model_arguments = completion.template_inputs_from_request(request)
-        suggestion = ""
+        suggestion = processed_suggestion = ""
 
         # send an incomplete `InlineCompletionReply`, indicating to the
         # client that LLM output is about to streamed across this connection.
@@ -505,25 +505,20 @@ async def stream_inline_completions(
 
         async for fragment in chain.astream(input=model_arguments):
             suggestion += fragment
-            if suggestion.startswith("```"):
-                if "\n" not in suggestion:
-                    # we are not ready to apply post-processing
-                    continue
-                else:
-                    suggestion = completion.post_process_suggestion(suggestion, request)
-            elif suggestion.rstrip().endswith("```"):
-                suggestion = completion.post_process_suggestion(suggestion, request)
+            processed_suggestion = completion.post_process_suggestion(
+                suggestion, request
+            )
             yield InlineCompletionStreamChunk(
                 type="stream",
-                response={"insertText": suggestion, "token": token},
+                response={"insertText": processed_suggestion, "token": token},
                 reply_to=request.number,
                 done=False,
             )
 
         # finally, send a message confirming that we are done
         yield InlineCompletionStreamChunk(
             type="stream",
-            response={"insertText": suggestion, "token": token},
+            response={"insertText": processed_suggestion, "token": token},
             reply_to=request.number,
             done=True,
         )

diff --git a/packages/jupyter-ai/jupyter_ai/tests/completions/test_handlers.py b/packages/jupyter-ai/jupyter_ai/tests/completions/test_handlers.py
@@ -98,13 +98,16 @@ async def test_handle_request(inline_handler):
     assert suggestions[0].insertText == "Test response"
 
 
+expected_suggestions_cases = [
+    ("```python\nTest python code\n```", "Test python code"),
+    ("```\ntest\n```\n   \n", "test"),
+    ("```hello```world```", "hello```world"),
+]
+
+
 @pytest.mark.parametrize(
     "response,expected_suggestion",
-    [
-        ("```python\nTest python code\n```", "Test python code"),
-        ("```\ntest\n```\n   \n", "test"),
-        ("```hello```world```", "hello```world"),
-    ],
+    expected_suggestions_cases,
 )
 async def test_handle_request_with_spurious_fragments(response, expected_suggestion):
     inline_handler = MockCompletionHandler(
@@ -128,6 +131,32 @@ async def test_handle_request_with_spurious_fragments(response, expected_suggest
     assert suggestions[0].insertText == expected_suggestion
 
 
+@pytest.mark.parametrize(
+    "response,expected_suggestion",
+    expected_suggestions_cases,
+)
+async def test_handle_request_with_spurious_fragments_stream(
+    response, expected_suggestion
+):
+    inline_handler = MockCompletionHandler(
+        lm_provider=MockProvider,
+        lm_provider_params={
+            "model_id": "model",
+            "responses": [response],
+        },
+    )
+    dummy_request = InlineCompletionRequest(
+        number=1, prefix="", suffix="", mime="", stream=True
+    )
+
+    await inline_handler.handle_stream_request(dummy_request)
+    assert len(inline_handler.messages) == 3
+    # the streamed fragment should not include spurious fragments
+    assert inline_handler.messages[1].response.insertText == expected_suggestion
+    # the final state should not include spurious fragments either
+    assert inline_handler.messages[2].response.insertText == expected_suggestion
+
+
 async def test_handle_stream_request():
     inline_handler = MockCompletionHandler(
         lm_provider=MockProvider,