From 83cbd8ea240f1429766c417bada3bfb39afc4462 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Krassowski?=
 <5832902+krassowski@users.noreply.github.com>
Date: Tue, 10 Sep 2024 21:37:13 +0100
Subject: [PATCH 1/8] Allow unlimited LLM memory through traitlets
 configuration (#986)

---
 packages/jupyter-ai/jupyter_ai/extension.py   |  6 +++
 packages/jupyter-ai/jupyter_ai/history.py     |  8 ++--
 .../jupyter_ai/tests/test_extension.py        | 48 +++++++++++++++++++
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/packages/jupyter-ai/jupyter_ai/extension.py b/packages/jupyter-ai/jupyter_ai/extension.py
index a57fb0eb9..78c4dfefa 100644
--- a/packages/jupyter-ai/jupyter_ai/extension.py
+++ b/packages/jupyter-ai/jupyter_ai/extension.py
@@ -188,7 +188,13 @@ class AiExtension(ExtensionApp):
         default_value=2,
         help="""
         Number of chat interactions to keep in the conversational memory object.
+
+        An interaction is defined as an exchange between a human and AI, thus
+        comprising of one or two messages.
+
+        Set to `None` to keep all interactions.
         """,
+        allow_none=True,
         config=True,
     )
 
diff --git a/packages/jupyter-ai/jupyter_ai/history.py b/packages/jupyter-ai/jupyter_ai/history.py
index 9e1064194..c857b4486 100644
--- a/packages/jupyter-ai/jupyter_ai/history.py
+++ b/packages/jupyter-ai/jupyter_ai/history.py
@@ -1,5 +1,5 @@
 import time
-from typing import List, Optional, Sequence, Set
+from typing import List, Optional, Sequence, Set, Union
 
 from langchain_core.chat_history import BaseChatMessageHistory
 from langchain_core.messages import BaseMessage
@@ -16,16 +16,18 @@ class BoundedChatHistory(BaseChatMessageHistory, BaseModel):
     `k` exchanges between a user and an LLM.
 
     For example, when `k=2`, `BoundedChatHistory` will store up to 2 human
-    messages and 2 AI messages.
+    messages and 2 AI messages. If `k` is set to `None` all messages are kept.
     """
 
-    k: int
+    k: Union[int, None]
     clear_time: float = 0.0
     cleared_msgs: Set[str] = set()
     _all_messages: List[BaseMessage] = PrivateAttr(default_factory=list)
 
     @property
     def messages(self) -> List[BaseMessage]:
+        if self.k is None:
+            return self._all_messages
         return self._all_messages[-self.k * 2 :]
 
     async def aget_messages(self) -> List[BaseMessage]:
diff --git a/packages/jupyter-ai/jupyter_ai/tests/test_extension.py b/packages/jupyter-ai/jupyter_ai/tests/test_extension.py
index b46c148c3..9ae52d8a0 100644
--- a/packages/jupyter-ai/jupyter_ai/tests/test_extension.py
+++ b/packages/jupyter-ai/jupyter_ai/tests/test_extension.py
@@ -1,7 +1,12 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
+from unittest import mock
+
 import pytest
 from jupyter_ai.extension import AiExtension
+from jupyter_ai.history import HUMAN_MSG_ID_KEY
+from jupyter_ai_magics import BaseProvider
+from langchain_core.messages import BaseMessage
 
 pytest_plugins = ["pytest_jupyter.jupyter_server"]
 
@@ -43,3 +48,46 @@ def test_blocks_providers(argv, jp_configurable_serverapp):
     ai._link_jupyter_server_extension(server)
     ai.initialize_settings()
     assert KNOWN_LM_A not in ai.settings["lm_providers"]
+
+
+@pytest.fixture
+def jp_server_config(jp_server_config):
+    # Disable the extension during server startup to avoid double initialization
+    return {"ServerApp": {"jpserver_extensions": {"jupyter_ai": False}}}
+
+
+@pytest.fixture
+def ai_extension(jp_serverapp):
+    ai = AiExtension()
+    # `BaseProvider.server_settings` can be only initialized once; however, the tests
+    # may run in parallel setting it with race condition; because we are not testing
+    # the `BaseProvider.server_settings` here, we can just mock the setter
+    settings_mock = mock.PropertyMock()
+    with mock.patch.object(BaseProvider.__class__, "server_settings", settings_mock):
+        yield ai
+
+
+@pytest.mark.parametrize(
+    "max_history,messages_to_add,expected_size",
+    [
+        # for max_history = 1 we expect to see up to 2 messages (1 human and 1 AI message)
+        (1, 4, 2),
+        # if there is less than `max_history` messages, all should be returned
+        (1, 1, 1),
+        # if no limit is set, all messages should be returned
+        (None, 9, 9),
+    ],
+)
+def test_max_chat_history(ai_extension, max_history, messages_to_add, expected_size):
+    ai = ai_extension
+    ai.default_max_chat_history = max_history
+    ai.initialize_settings()
+    for i in range(messages_to_add):
+        message = BaseMessage(
+            content=f"Test message {i}",
+            type="test",
+            additional_kwargs={HUMAN_MSG_ID_KEY: f"message-{i}"},
+        )
+        ai.settings["llm_chat_memory"].add_message(message)
+
+    assert len(ai.settings["llm_chat_memory"].messages) == expected_size

From db53228bc863eb439b5bdd749bc9e8bf6781245b Mon Sep 17 00:00:00 2001
From: dlqqq <dlqqq@users.noreply.github.com>
Date: Wed, 11 Sep 2024 18:47:15 +0000
Subject: [PATCH 2/8] Publish 2.23.0

SHA256 hashes:

jupyter-ai-core-2.23.0.tgz: 6411e535248f1ac765f627f7a3aef68cbdb04f35b7c62ea56aef74f872514f91

jupyter_ai-2.23.0-py3-none-any.whl: 894c54f03654505ed6c59012532eac05f25ad659693483d290283d38b01fb113

jupyter_ai-2.23.0.tar.gz: 486a1bb744a776befc4fc2b633b7626bce1b356742692a6d6d8c673514a35436

jupyter_ai_magics-2.23.0-py3-none-any.whl: a0789f35e9b88df30b96305f263e27163990381fd64ebacfe4babc78fb03ebdc

jupyter_ai_magics-2.23.0.tar.gz: 3237f744c4be2e5a1b48c6e6619b0e34158a8ab5eb45a62d6372d36b69b5be84
---
 CHANGELOG.md                            | 25 +++++++++++++++++++++++--
 lerna.json                              |  2 +-
 package.json                            |  2 +-
 packages/jupyter-ai-magics/package.json |  2 +-
 packages/jupyter-ai-test/package.json   |  2 +-
 packages/jupyter-ai/package.json        |  2 +-
 6 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d1e7b9c9d..0f614d7ef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,29 @@
 
 <!-- <START NEW CHANGELOG ENTRY> -->
 
+## 2.23.0
+
+([Full Changelog](https://github.com/jupyterlab/jupyter-ai/compare/@jupyter-ai/core@2.22.0...83cbd8ea240f1429766c417bada3bfb39afc4462))
+
+### Enhancements made
+
+- Allow unlimited LLM memory through traitlets configuration [#986](https://github.com/jupyterlab/jupyter-ai/pull/986) ([@krassowski](https://github.com/krassowski))
+- Allow to disable automatic inline completions [#981](https://github.com/jupyterlab/jupyter-ai/pull/981) ([@krassowski](https://github.com/krassowski))
+- Add ability to delete messages + start new chat session [#951](https://github.com/jupyterlab/jupyter-ai/pull/951) ([@michaelchia](https://github.com/michaelchia))
+
+### Bugs fixed
+
+- Fix `RunnableWithMessageHistory` import [#980](https://github.com/jupyterlab/jupyter-ai/pull/980) ([@krassowski](https://github.com/krassowski))
+- Fix sort messages [#975](https://github.com/jupyterlab/jupyter-ai/pull/975) ([@michaelchia](https://github.com/michaelchia))
+
+### Contributors to this release
+
+([GitHub contributors page for this release](https://github.com/jupyterlab/jupyter-ai/graphs/contributors?from=2024-08-29&to=2024-09-11&type=c))
+
+[@dlqqq](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Adlqqq+updated%3A2024-08-29..2024-09-11&type=Issues) | [@krassowski](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Akrassowski+updated%3A2024-08-29..2024-09-11&type=Issues) | [@michaelchia](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Amichaelchia+updated%3A2024-08-29..2024-09-11&type=Issues) | [@srdas](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Asrdas+updated%3A2024-08-29..2024-09-11&type=Issues)
+
+<!-- <END NEW CHANGELOG ENTRY> -->
+
 ## 2.22.0
 
 ([Full Changelog](https://github.com/jupyterlab/jupyter-ai/compare/@jupyter-ai/core@2.21.0...79158abf7044605c5776e205dd171fe87fb64142))
@@ -25,8 +48,6 @@
 
 [@dlqqq](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Adlqqq+updated%3A2024-08-19..2024-08-29&type=Issues) | [@krassowski](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Akrassowski+updated%3A2024-08-19..2024-08-29&type=Issues) | [@michaelchia](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Amichaelchia+updated%3A2024-08-19..2024-08-29&type=Issues) | [@pre-commit-ci](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Apre-commit-ci+updated%3A2024-08-19..2024-08-29&type=Issues) | [@srdas](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Asrdas+updated%3A2024-08-19..2024-08-29&type=Issues) | [@trducng](https://github.com/search?q=repo%3Ajupyterlab%2Fjupyter-ai+involves%3Atrducng+updated%3A2024-08-19..2024-08-29&type=Issues)
 
-<!-- <END NEW CHANGELOG ENTRY> -->
-
 ## 2.21.0
 
 ([Full Changelog](https://github.com/jupyterlab/jupyter-ai/compare/@jupyter-ai/core@2.20.0...83e368b9d04904f9eb0ad4b1f0759bf3b7bbc93d))
diff --git a/lerna.json b/lerna.json
index 5c20bb775..5c205293f 100644
--- a/lerna.json
+++ b/lerna.json
@@ -1,7 +1,7 @@
 {
   "$schema": "node_modules/lerna/schemas/lerna-schema.json",
   "useWorkspaces": true,
-  "version": "2.22.0",
+  "version": "2.23.0",
   "npmClient": "yarn",
   "useNx": true
 }
diff --git a/package.json b/package.json
index 7f9950ded..a9fd1da7e 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@jupyter-ai/monorepo",
-  "version": "2.22.0",
+  "version": "2.23.0",
   "description": "A generative AI extension for JupyterLab",
   "private": true,
   "keywords": [
diff --git a/packages/jupyter-ai-magics/package.json b/packages/jupyter-ai-magics/package.json
index c22c4e1cc..05f9cea6b 100644
--- a/packages/jupyter-ai-magics/package.json
+++ b/packages/jupyter-ai-magics/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@jupyter-ai/magics",
-  "version": "2.22.0",
+  "version": "2.23.0",
   "description": "Jupyter AI magics Python package. Not published on NPM.",
   "private": true,
   "homepage": "https://github.com/jupyterlab/jupyter-ai",
diff --git a/packages/jupyter-ai-test/package.json b/packages/jupyter-ai-test/package.json
index 1198570f1..916636af6 100644
--- a/packages/jupyter-ai-test/package.json
+++ b/packages/jupyter-ai-test/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@jupyter-ai/test",
-  "version": "2.22.0",
+  "version": "2.23.0",
   "description": "Jupyter AI test package. Not published on NPM or PyPI.",
   "private": true,
   "homepage": "https://github.com/jupyterlab/jupyter-ai",
diff --git a/packages/jupyter-ai/package.json b/packages/jupyter-ai/package.json
index 70cde8552..bd9a20a71 100644
--- a/packages/jupyter-ai/package.json
+++ b/packages/jupyter-ai/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@jupyter-ai/core",
-  "version": "2.22.0",
+  "version": "2.23.0",
   "description": "A generative AI extension for JupyterLab",
   "keywords": [
     "jupyter",

From cdee73d7ec65e2ecd4bbb98aaaecf101b83ec1e0 Mon Sep 17 00:00:00 2001
From: david qiu <david@qiu.dev>
Date: Thu, 12 Sep 2024 10:29:33 -0700
Subject: [PATCH 3/8] upgrade to actions/upload-artifact v4 (#992)

---
 .github/workflows/e2e-tests.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
index 912bd1172..c6676c277 100644
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -2,13 +2,13 @@ name: E2E Tests
 
 # suppress warning raised by https://github.com/jupyter/jupyter_core/pull/292
 env:
-  JUPYTER_PLATFORM_DIRS: '1'
+  JUPYTER_PLATFORM_DIRS: "1"
 
 on:
   push:
     branches: main
   pull_request:
-    branches: '*'
+    branches: "*"
 
 jobs:
   e2e-tests:
@@ -41,17 +41,17 @@ jobs:
             ${{ github.workspace }}/pw-browsers
           key: ${{ runner.os }}-${{ hashFiles('packages/jupyter-ai/ui-tests/yarn.lock') }}
 
-      - name: Install browser
+      - name: Install Chromium
         working-directory: packages/jupyter-ai/ui-tests
         run: jlpm install-chromium
 
-      - name: Execute e2e tests
+      - name: Run E2E tests
         working-directory: packages/jupyter-ai/ui-tests
         run: jlpm test
 
-      - name: Upload Playwright Test report
+      - name: Upload Playwright test report
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: jupyter-ai-playwright-tests-linux
           path: |

From 7b4a708ad6e4cb0119102041bd86f98015bc4900 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Krassowski?=
 <5832902+krassowski@users.noreply.github.com>
Date: Thu, 12 Sep 2024 20:23:56 +0100
Subject: [PATCH 4/8] Run mypy on CI, fix or ignore typing issues (#987)

* Run mypy on CI

* Rename, add mypy to test deps

* Fix typing jupyter-ai codebase (mostly)

* Three more cases

* update deepmerge version specifier

---------

Co-authored-by: David L. Qiu <david@qiu.dev>
---
 .github/workflows/unit-tests.yml              | 23 ++++++++++++++--
 .../jupyter_ai_magics/models/completion.py    |  1 +
 .../jupyter_ai_magics/py.typed                |  0
 .../jupyter_ai/chat_handlers/ask.py           |  1 +
 .../jupyter_ai/chat_handlers/base.py          | 27 ++++++++++---------
 .../jupyter_ai/chat_handlers/default.py       |  7 ++---
 .../jupyter_ai/chat_handlers/fix.py           |  1 +
 .../jupyter_ai/chat_handlers/generate.py      |  3 ++-
 .../jupyter_ai/chat_handlers/learn.py         | 14 ++++++----
 .../jupyter-ai/jupyter_ai/config_manager.py   |  8 +++---
 .../jupyter_ai/document_loaders/directory.py  |  2 +-
 packages/jupyter-ai/jupyter_ai/extension.py   |  2 +-
 packages/jupyter-ai/jupyter_ai/handlers.py    | 15 +++++------
 packages/jupyter-ai/jupyter_ai/history.py     |  4 +--
 packages/jupyter-ai/jupyter_ai/models.py      | 18 ++++++-------
 packages/jupyter-ai/pyproject.toml            |  4 ++-
 pyproject.toml                                |  5 ++++
 17 files changed, 85 insertions(+), 50 deletions(-)
 create mode 100644 packages/jupyter-ai-magics/jupyter_ai_magics/py.typed

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 518cd437c..05224180e 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -1,4 +1,4 @@
-name: Python Unit Tests
+name: Python Tests
 
 # suppress warning raised by https://github.com/jupyter/jupyter_core/pull/292
 env:
@@ -12,7 +12,7 @@ on:
 
 jobs:
   unit-tests:
-    name: Linux
+    name: Unit tests
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -28,3 +28,22 @@ jobs:
         run: |
           set -eux
           pytest -vv -r ap --cov jupyter_ai
+
+  typing-tests:
+    name: Typing test
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Base Setup
+        uses: jupyterlab/maintainer-tools/.github/actions/base-setup@v1
+
+      - name: Install extension dependencies and build the extension
+        run: ./scripts/install.sh
+
+      - name: Run mypy
+        run: |
+          set -eux
+          mypy --version
+          mypy packages/jupyter-ai
diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py b/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py
index 147f6ceec..f2ee0cd54 100644
--- a/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py
+++ b/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py
@@ -43,6 +43,7 @@ class InlineCompletionItem(BaseModel):
 
 class CompletionError(BaseModel):
     type: str
+    title: str
     traceback: str
 
 
diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/py.typed b/packages/jupyter-ai-magics/jupyter_ai_magics/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py
index 5c3026685..2da303e5a 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py
@@ -72,6 +72,7 @@ async def process_message(self, message: HumanChatMessage):
 
         try:
             with self.pending("Searching learned documents", message):
+                assert self.llm_chain
                 result = await self.llm_chain.acall({"question": query})
                 response = result["answer"]
             self.reply(response, message)
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
index b97015518..347bfbf83 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
@@ -13,6 +13,7 @@
     Optional,
     Type,
     Union,
+    cast,
 )
 from uuid import uuid4
 
@@ -28,6 +29,7 @@
 )
 from jupyter_ai_magics import Persona
 from jupyter_ai_magics.providers import BaseProvider
+from langchain.chains import LLMChain
 from langchain.pydantic_v1 import BaseModel
 
 if TYPE_CHECKING:
@@ -36,8 +38,8 @@
     from langchain_core.chat_history import BaseChatMessageHistory
 
 
-def get_preferred_dir(root_dir: str, preferred_dir: str) -> Optional[str]:
-    if preferred_dir != "":
+def get_preferred_dir(root_dir: str, preferred_dir: Optional[str]) -> Optional[str]:
+    if preferred_dir is not None and preferred_dir != "":
         preferred_dir = os.path.expanduser(preferred_dir)
         if not preferred_dir.startswith(root_dir):
             preferred_dir = os.path.join(root_dir, preferred_dir)
@@ -47,7 +49,7 @@ def get_preferred_dir(root_dir: str, preferred_dir: str) -> Optional[str]:
 
 # Chat handler type, with specific attributes for each
 class HandlerRoutingType(BaseModel):
-    routing_method: ClassVar[Union[Literal["slash_command"]]] = ...
+    routing_method: ClassVar[Union[Literal["slash_command"]]]
     """The routing method that sends commands to this handler."""
 
 
@@ -83,17 +85,17 @@ class BaseChatHandler:
     multiple chat handler classes."""
 
     # Class attributes
-    id: ClassVar[str] = ...
+    id: ClassVar[str]
     """ID for this chat handler; should be unique"""
 
-    name: ClassVar[str] = ...
+    name: ClassVar[str]
     """User-facing name of this handler"""
 
-    help: ClassVar[str] = ...
+    help: ClassVar[str]
     """What this chat handler does, which third-party models it contacts,
     the data it returns to the user, and so on, for display in the UI."""
 
-    routing_type: ClassVar[HandlerRoutingType] = ...
+    routing_type: ClassVar[HandlerRoutingType]
 
     uses_llm: ClassVar[bool] = True
     """Class attribute specifying whether this chat handler uses the LLM
@@ -153,9 +155,9 @@ def __init__(
         self.help_message_template = help_message_template
         self.chat_handlers = chat_handlers
 
-        self.llm = None
-        self.llm_params = None
-        self.llm_chain = None
+        self.llm: Optional[BaseProvider] = None
+        self.llm_params: Optional[dict] = None
+        self.llm_chain: Optional[LLMChain] = None
 
     async def on_message(self, message: HumanChatMessage):
         """
@@ -168,9 +170,8 @@ async def on_message(self, message: HumanChatMessage):
 
         # ensure the current slash command is supported
         if self.routing_type.routing_method == "slash_command":
-            slash_command = (
-                "/" + self.routing_type.slash_id if self.routing_type.slash_id else ""
-            )
+            routing_type = cast(SlashCommandRoutingType, self.routing_type)
+            slash_command = "/" + routing_type.slash_id if routing_type.slash_id else ""
             if slash_command in lm_provider_klass.unsupported_slash_commands:
                 self.reply(
                     "Sorry, the selected language model does not support this slash command."
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
index 3e8f194b3..2bf88bafe 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
@@ -41,10 +41,10 @@ def create_llm_chain(
         prompt_template = llm.get_chat_prompt_template()
         self.llm = llm
 
-        runnable = prompt_template | llm
+        runnable = prompt_template | llm  # type:ignore
         if not llm.manages_history:
             runnable = RunnableWithMessageHistory(
-                runnable=runnable,
+                runnable=runnable,  #  type:ignore[arg-type]
                 get_session_history=self.get_llm_chat_memory,
                 input_messages_key="input",
                 history_messages_key="history",
@@ -106,6 +106,7 @@ async def process_message(self, message: HumanChatMessage):
             # stream response in chunks. this works even if a provider does not
             # implement streaming, as `astream()` defaults to yielding `_call()`
             # when `_stream()` is not implemented on the LLM class.
+            assert self.llm_chain
             async for chunk in self.llm_chain.astream(
                 {"input": message.body},
                 config={"configurable": {"last_human_msg": message}},
@@ -117,7 +118,7 @@ async def process_message(self, message: HumanChatMessage):
                     stream_id = self._start_stream(human_msg=message)
                     received_first_chunk = True
 
-                if isinstance(chunk, AIMessageChunk):
+                if isinstance(chunk, AIMessageChunk) and isinstance(chunk.content, str):
                     self._send_stream_chunk(stream_id, chunk.content)
                 elif isinstance(chunk, str):
                     self._send_stream_chunk(stream_id, chunk)
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py
index d6ecc6d81..1056e592c 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py
@@ -93,6 +93,7 @@ async def process_message(self, message: HumanChatMessage):
 
         self.get_llm_chain()
         with self.pending("Analyzing error", message):
+            assert self.llm_chain
             response = await self.llm_chain.apredict(
                 extra_instructions=extra_instructions,
                 stop=["\nHuman:"],
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py
index 52398eabe..a69b5ed28 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py
@@ -226,7 +226,7 @@ class GenerateChatHandler(BaseChatHandler):
     def __init__(self, log_dir: Optional[str], *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.log_dir = Path(log_dir) if log_dir else None
-        self.llm = None
+        self.llm: Optional[BaseProvider] = None
 
     def create_llm_chain(
         self, provider: Type[BaseProvider], provider_params: Dict[str, str]
@@ -248,6 +248,7 @@ async def _generate_notebook(self, prompt: str):
         # Save the user input prompt, the description property is now LLM generated.
         outline["prompt"] = prompt
 
+        assert self.llm
         if self.llm.allows_concurrency:
             # fill the outline concurrently
             await afill_outline(outline, llm=self.llm, verbose=True)
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
index 29e147f22..3d1c46661 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
@@ -223,7 +223,9 @@ async def learn_dir(
         }
         splitter = ExtensionSplitter(
             splitters=splitters,
-            default_splitter=RecursiveCharacterTextSplitter(**splitter_kwargs),
+            default_splitter=RecursiveCharacterTextSplitter(
+                **splitter_kwargs  # type:ignore[arg-type]
+            ),
         )
 
         delayed = split(path, all_files, splitter=splitter)
@@ -352,7 +354,7 @@ async def aget_relevant_documents(
         self, query: str
     ) -> Coroutine[Any, Any, List[Document]]:
         if not self.index:
-            return []
+            return []  # type:ignore[return-value]
 
         await self.delete_and_relearn()
         docs = self.index.similarity_search(query)
@@ -370,12 +372,14 @@ def get_embedding_model(self):
 
 
 class Retriever(BaseRetriever):
-    learn_chat_handler: LearnChatHandler = None
+    learn_chat_handler: LearnChatHandler = None  # type:ignore[assignment]
 
-    def _get_relevant_documents(self, query: str) -> List[Document]:
+    def _get_relevant_documents(  # type:ignore[override]
+        self, query: str
+    ) -> List[Document]:
         raise NotImplementedError()
 
-    async def _aget_relevant_documents(
+    async def _aget_relevant_documents(  # type:ignore[override]
         self, query: str
     ) -> Coroutine[Any, Any, List[Document]]:
         docs = await self.learn_chat_handler.aget_relevant_documents(query)
diff --git a/packages/jupyter-ai/jupyter_ai/config_manager.py b/packages/jupyter-ai/jupyter_ai/config_manager.py
index d7674c5a2..2f38ca992 100644
--- a/packages/jupyter-ai/jupyter_ai/config_manager.py
+++ b/packages/jupyter-ai/jupyter_ai/config_manager.py
@@ -3,7 +3,7 @@
 import os
 import shutil
 import time
-from typing import List, Optional, Union
+from typing import List, Optional, Type, Union
 
 from deepmerge import always_merger as Merger
 from jsonschema import Draft202012Validator as Validator
@@ -60,7 +60,7 @@ class BlockedModelError(Exception):
     pass
 
 
-def _validate_provider_authn(config: GlobalConfig, provider: AnyProvider):
+def _validate_provider_authn(config: GlobalConfig, provider: Type[AnyProvider]):
     # TODO: handle non-env auth strategies
     if not provider.auth_strategy or provider.auth_strategy.type != "env":
         return
@@ -147,7 +147,7 @@ def _init_config_schema(self):
             os.makedirs(os.path.dirname(self.schema_path), exist_ok=True)
             shutil.copy(OUR_SCHEMA_PATH, self.schema_path)
 
-    def _init_validator(self) -> Validator:
+    def _init_validator(self) -> None:
         with open(OUR_SCHEMA_PATH, encoding="utf-8") as f:
             schema = json.loads(f.read())
             Validator.check_schema(schema)
@@ -364,7 +364,7 @@ def delete_api_key(self, key_name: str):
         config_dict["api_keys"].pop(key_name, None)
         self._write_config(GlobalConfig(**config_dict))
 
-    def update_config(self, config_update: UpdateConfigRequest):
+    def update_config(self, config_update: UpdateConfigRequest):  # type:ignore
         last_write = os.stat(self.config_path).st_mtime_ns
         if config_update.last_read and config_update.last_read < last_write:
             raise WriteConflictError(
diff --git a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
index 7b9b28328..c8af71d84 100644
--- a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
+++ b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
@@ -30,7 +30,7 @@ def arxiv_to_text(id: str, output_dir: str) -> str:
         output path to the downloaded TeX file
     """
 
-    import arxiv
+    import arxiv  # type:ignore[import-not-found,import-untyped]
 
     outfile = f"{id}-{datetime.now():%Y-%m-%d-%H-%M}.tex"
     download_filename = "downloaded-paper.tar.gz"
diff --git a/packages/jupyter-ai/jupyter_ai/extension.py b/packages/jupyter-ai/jupyter_ai/extension.py
index 78c4dfefa..34b484546 100644
--- a/packages/jupyter-ai/jupyter_ai/extension.py
+++ b/packages/jupyter-ai/jupyter_ai/extension.py
@@ -52,7 +52,7 @@
 
 class AiExtension(ExtensionApp):
     name = "jupyter_ai"
-    handlers = [
+    handlers = [  # type:ignore[assignment]
         (r"api/ai/api_keys/(?P<api_key_name>\w+)", ApiKeysHandler),
         (r"api/ai/config/?", GlobalConfigHandler),
         (r"api/ai/chats/?", RootChatHandler),
diff --git a/packages/jupyter-ai/jupyter_ai/handlers.py b/packages/jupyter-ai/jupyter_ai/handlers.py
index a614e3e84..7c4f16e63 100644
--- a/packages/jupyter-ai/jupyter_ai/handlers.py
+++ b/packages/jupyter-ai/jupyter_ai/handlers.py
@@ -4,7 +4,7 @@
 import uuid
 from asyncio import AbstractEventLoop
 from dataclasses import asdict
-from typing import TYPE_CHECKING, Dict, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional, cast
 
 import tornado
 from jupyter_ai.chat_handlers import BaseChatHandler, SlashCommandRoutingType
@@ -42,14 +42,12 @@
     from jupyter_ai_magics.embedding_providers import BaseEmbeddingsProvider
     from jupyter_ai_magics.providers import BaseProvider
 
-    from .history import BoundChatHistory
+    from .history import BoundedChatHistory
 
 
 class ChatHistoryHandler(BaseAPIHandler):
     """Handler to return message history"""
 
-    _messages = []
-
     @property
     def chat_history(self) -> List[ChatMessage]:
         return self.settings["chat_history"]
@@ -103,7 +101,7 @@ def chat_history(self, new_history):
         self.settings["chat_history"] = new_history
 
     @property
-    def llm_chat_memory(self) -> "BoundChatHistory":
+    def llm_chat_memory(self) -> "BoundedChatHistory":
         return self.settings["llm_chat_memory"]
 
     @property
@@ -145,6 +143,7 @@ def get_chat_user(self) -> ChatUser:
         environment."""
         # Get a dictionary of all loaded extensions.
         # (`serverapp` is a property on all `JupyterHandler` subclasses)
+        assert self.serverapp
         extensions = self.serverapp.extension_manager.extensions
         collaborative = (
             "jupyter_collaboration" in extensions
@@ -401,7 +400,7 @@ def filter_predicate(local_model_id: str):
             if self.blocked_models:
                 return model_id not in self.blocked_models
             else:
-                return model_id in self.allowed_models
+                return model_id in cast(List, self.allowed_models)
 
         # filter out every model w/ model ID according to allow/blocklist
         for provider in providers:
@@ -514,7 +513,7 @@ def post(self):
 
 class ApiKeysHandler(BaseAPIHandler):
     @property
-    def config_manager(self) -> ConfigManager:
+    def config_manager(self) -> ConfigManager:  # type:ignore[override]
         return self.settings["jai_config_manager"]
 
     @web.authenticated
@@ -529,7 +528,7 @@ class SlashCommandsInfoHandler(BaseAPIHandler):
     """List slash commands that are currently available to the user."""
 
     @property
-    def config_manager(self) -> ConfigManager:
+    def config_manager(self) -> ConfigManager:  # type:ignore[override]
         return self.settings["jai_config_manager"]
 
     @property
diff --git a/packages/jupyter-ai/jupyter_ai/history.py b/packages/jupyter-ai/jupyter_ai/history.py
index c857b4486..0f1ba7dc0 100644
--- a/packages/jupyter-ai/jupyter_ai/history.py
+++ b/packages/jupyter-ai/jupyter_ai/history.py
@@ -25,7 +25,7 @@ class BoundedChatHistory(BaseChatMessageHistory, BaseModel):
     _all_messages: List[BaseMessage] = PrivateAttr(default_factory=list)
 
     @property
-    def messages(self) -> List[BaseMessage]:
+    def messages(self) -> List[BaseMessage]:  # type:ignore[override]
         if self.k is None:
             return self._all_messages
         return self._all_messages[-self.k * 2 :]
@@ -92,7 +92,7 @@ class WrappedBoundedChatHistory(BaseChatMessageHistory, BaseModel):
     last_human_msg: HumanChatMessage
 
     @property
-    def messages(self) -> List[BaseMessage]:
+    def messages(self) -> List[BaseMessage]:  # type:ignore[override]
         return self.history.messages
 
     def add_message(self, message: BaseMessage) -> None:
diff --git a/packages/jupyter-ai/jupyter_ai/models.py b/packages/jupyter-ai/jupyter_ai/models.py
index f9098a12a..bda4d3421 100644
--- a/packages/jupyter-ai/jupyter_ai/models.py
+++ b/packages/jupyter-ai/jupyter_ai/models.py
@@ -70,8 +70,7 @@ class ChatClient(ChatUser):
     id: str
 
 
-class AgentChatMessage(BaseModel):
-    type: Literal["agent"] = "agent"
+class BaseAgentMessage(BaseModel):
     id: str
     time: float
     body: str
@@ -89,7 +88,11 @@ class AgentChatMessage(BaseModel):
     """
 
 
-class AgentStreamMessage(AgentChatMessage):
+class AgentChatMessage(BaseAgentMessage):
+    type: Literal["agent"] = "agent"
+
+
+class AgentStreamMessage(BaseAgentMessage):
     type: Literal["agent-stream"] = "agent-stream"
     complete: bool
     # other attrs inherited from `AgentChatMessage`
@@ -138,15 +141,13 @@ class PendingMessage(BaseModel):
 
 
 class ClosePendingMessage(BaseModel):
-    type: Literal["pending"] = "close-pending"
+    type: Literal["close-pending"] = "close-pending"
     id: str
 
 
 # the type of messages being broadcast to clients
 ChatMessage = Union[
-    AgentChatMessage,
-    HumanChatMessage,
-    AgentStreamMessage,
+    AgentChatMessage, HumanChatMessage, AgentStreamMessage, AgentStreamChunkMessage
 ]
 
 
@@ -164,8 +165,7 @@ class ConnectionMessage(BaseModel):
 
 
 Message = Union[
-    AgentChatMessage,
-    HumanChatMessage,
+    ChatMessage,
     ConnectionMessage,
     ClearMessage,
     PendingMessage,
diff --git a/packages/jupyter-ai/pyproject.toml b/packages/jupyter-ai/pyproject.toml
index e8deeb133..88f5fc55f 100644
--- a/packages/jupyter-ai/pyproject.toml
+++ b/packages/jupyter-ai/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
     "faiss-cpu<=1.8.0",          # Not distributed by official repo
     "typing_extensions>=4.5.0",
     "traitlets>=5.0",
-    "deepmerge>=1.0",
+    "deepmerge>=2.0,<3",
 ]
 
 dynamic = ["version", "description", "authors", "urls", "keywords"]
@@ -50,6 +50,8 @@ test = [
     "pytest-tornasync",
     "pytest-jupyter",
     "syrupy~=4.0.8",
+    "types-jsonschema",
+    "mypy",
 ]
 
 dev = ["jupyter_ai_magics[dev]"]
diff --git a/pyproject.toml b/pyproject.toml
index cc45dd977..d10ce4594 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,3 +43,8 @@ ignore_imports = ["jupyter_ai_magics.providers -> pydantic"]
 
 [tool.pytest.ini_options]
 addopts = "--ignore packages/jupyter-ai-module-cookiecutter"
+
+[tool.mypy]
+exclude = [
+  "tests"
+]

From 6d8a6e6c2804b694d8ce37631cbd2ca812444dd3 Mon Sep 17 00:00:00 2001
From: andrewfulton9 <andrewfulton9@gmail.com>
Date: Mon, 9 Sep 2024 22:17:29 +0000
Subject: [PATCH 5/8] adds wildcard matching to /learn

---
 .../jupyter_ai/chat_handlers/learn.py         | 22 ++++++++++++++-----
 .../jupyter_ai/document_loaders/directory.py  |  5 ++++-
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
index 3d1c46661..6fb10a7d1 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
@@ -1,6 +1,7 @@
 import argparse
 import json
 import os
+from glob import iglob
 from typing import Any, Coroutine, List, Optional, Tuple
 
 from dask.distributed import Client as DaskClient
@@ -180,9 +181,13 @@ async def process_message(self, message: HumanChatMessage):
         short_path = args.path[0]
         load_path = os.path.join(self.output_dir, short_path)
         if not os.path.exists(load_path):
-            response = f"Sorry, that path doesn't exist: {load_path}"
-            self.reply(response, message)
-            return
+            try:
+                # check if globbing the load path will return anything
+                next(iglob(load_path))
+            except StopIteration:
+                response = f"Sorry, that path doesn't exist: {load_path}"
+                self.reply(response, message)
+                return
 
         # delete and relearn index if embedding model was changed
         await self.delete_and_relearn()
@@ -193,11 +198,16 @@ async def process_message(self, message: HumanChatMessage):
                     load_path, args.chunk_size, args.chunk_overlap, args.all_files
                 )
             except Exception as e:
-                response = f"""Learn documents in **{load_path}** failed. {str(e)}."""
+                response = """Learn documents in **{}** failed. {}.""".format(
+                    load_path.replace("*", r"\*"),
+                    str(e),
+                )
             else:
                 self.save()
-                response = f"""🎉 I have learned documents at **{load_path}** and I am ready to answer questions about them.
-                    You can ask questions about these docs by prefixing your message with **/ask**."""
+                response = """🎉 I have learned documents at **%s** and I am ready to answer questions about them.
+                    You can ask questions about these docs by prefixing your message with **/ask**.""" % (
+                    load_path.replace("*", r"\*")
+                )
         self.reply(response, message)
 
     def _build_list_response(self):
diff --git a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
index c8af71d84..a7fdde4d8 100644
--- a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
+++ b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
@@ -3,6 +3,7 @@
 import os
 import tarfile
 from datetime import datetime
+from glob import iglob
 from pathlib import Path
 from typing import List
 
@@ -138,7 +139,9 @@ def collect_filepaths(path, all_files: bool):
 def split(path, all_files: bool, splitter):
     """Splits files into chunks for vector db in RAG"""
     chunks = []
-    filepaths = collect_filepaths(path, all_files)
+    filepaths = set()
+    for glob_path in iglob(path, include_hidden=all_files, recursive=True):
+        filepaths.update(collect_filepaths(glob_path, all_files))
     for filepath in filepaths:
         document = dask.delayed(path_to_doc)(filepath)
         chunk = dask.delayed(split_document)(document, splitter)

From 420e4cc9017e7c145e23026934097de289e0ca59 Mon Sep 17 00:00:00 2001
From: andrewfulton9 <andrewfulton9@gmail.com>
Date: Tue, 10 Sep 2024 15:46:14 +0000
Subject: [PATCH 6/8] Add documentation

---
 docs/source/users/index.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/source/users/index.md b/docs/source/users/index.md
index de42c56ec..73385b194 100644
--- a/docs/source/users/index.md
+++ b/docs/source/users/index.md
@@ -499,6 +499,13 @@ To teach Jupyter AI about a folder full of documentation, for example, run `/lea
     alt='Screen shot of "/learn docs/" command and a response.'
     class="screenshot" />
 
+The `/learn` command also supports unix shell-style wildcard matching. This allows fine-grained file selection for learning. For example, to learn on only notebooks in a directory you can use `**/*.ipynb` and all notebooks within your base (or preferred directory if set) will be indexed, while all other file extensions will be ignored.
+
+:::{warning}
+:name: unix shell-style wildcard matching
+Certain patterns may cause `/learn` to run more slowly. For instance `/learn **` may cause directories to be walked multiple times in search of files.
+:::
+
 You can then use `/ask` to ask a question specifically about the data that you taught Jupyter AI with `/learn`.
 
 <img src="../_static/chat-ask-command.png"

From 66f0db10fac18d72446082ad599e1aa0a6632296 Mon Sep 17 00:00:00 2001
From: andrewfulton9 <andrewfulton9@gmail.com>
Date: Tue, 10 Sep 2024 15:52:29 +0000
Subject: [PATCH 7/8] improve docs

---
 docs/source/users/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/users/index.md b/docs/source/users/index.md
index 73385b194..856933ee2 100644
--- a/docs/source/users/index.md
+++ b/docs/source/users/index.md
@@ -499,7 +499,7 @@ To teach Jupyter AI about a folder full of documentation, for example, run `/lea
     alt='Screen shot of "/learn docs/" command and a response.'
     class="screenshot" />
 
-The `/learn` command also supports unix shell-style wildcard matching. This allows fine-grained file selection for learning. For example, to learn on only notebooks in a directory you can use `**/*.ipynb` and all notebooks within your base (or preferred directory if set) will be indexed, while all other file extensions will be ignored.
+The `/learn` command also supports unix shell-style wildcard matching. This allows fine-grained file selection for learning. For example, to learn on only notebooks in a directory you can use `/learn **/*.ipynb` and all notebooks within your base (or preferred directory if set) will be indexed, while all other file extensions will be ignored.
 
 :::{warning}
 :name: unix shell-style wildcard matching

From 03cca3dd9e42a46ebaf66a45cb6cd573b4474a99 Mon Sep 17 00:00:00 2001
From: Andrew Fulton <andrewfulton9@gmail.com>
Date: Tue, 10 Sep 2024 10:44:17 -0600
Subject: [PATCH 8/8] Update docs/source/users/index.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Michał Krassowski <5832902+krassowski@users.noreply.github.com>
---
 docs/source/users/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/users/index.md b/docs/source/users/index.md
index 856933ee2..e68049cd9 100644
--- a/docs/source/users/index.md
+++ b/docs/source/users/index.md
@@ -499,7 +499,7 @@ To teach Jupyter AI about a folder full of documentation, for example, run `/lea
     alt='Screen shot of "/learn docs/" command and a response.'
     class="screenshot" />
 
-The `/learn` command also supports unix shell-style wildcard matching. This allows fine-grained file selection for learning. For example, to learn on only notebooks in a directory you can use `/learn **/*.ipynb` and all notebooks within your base (or preferred directory if set) will be indexed, while all other file extensions will be ignored.
+The `/learn` command also supports unix shell-style wildcard matching. This allows fine-grained file selection for learning. For example, to learn on only notebooks in all directories you can use `/learn **/*.ipynb` and all notebooks within your base (or preferred directory if set) will be indexed, while all other file extensions will be ignored.
 
 :::{warning}
 :name: unix shell-style wildcard matching