From 7b4a708ad6e4cb0119102041bd86f98015bc4900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Krassowski?= <5832902+krassowski@users.noreply.github.com> Date: Thu, 12 Sep 2024 20:23:56 +0100 Subject: [PATCH] Run mypy on CI, fix or ignore typing issues (#987) * Run mypy on CI * Rename, add mypy to test deps * Fix typing jupyter-ai codebase (mostly) * Three more cases * update deepmerge version specifier --------- Co-authored-by: David L. Qiu --- .github/workflows/unit-tests.yml | 23 ++++++++++++++-- .../jupyter_ai_magics/models/completion.py | 1 + .../jupyter_ai_magics/py.typed | 0 .../jupyter_ai/chat_handlers/ask.py | 1 + .../jupyter_ai/chat_handlers/base.py | 27 ++++++++++--------- .../jupyter_ai/chat_handlers/default.py | 7 ++--- .../jupyter_ai/chat_handlers/fix.py | 1 + .../jupyter_ai/chat_handlers/generate.py | 3 ++- .../jupyter_ai/chat_handlers/learn.py | 14 ++++++---- .../jupyter-ai/jupyter_ai/config_manager.py | 8 +++--- .../jupyter_ai/document_loaders/directory.py | 2 +- packages/jupyter-ai/jupyter_ai/extension.py | 2 +- packages/jupyter-ai/jupyter_ai/handlers.py | 15 +++++------ packages/jupyter-ai/jupyter_ai/history.py | 4 +-- packages/jupyter-ai/jupyter_ai/models.py | 18 ++++++------- packages/jupyter-ai/pyproject.toml | 4 ++- pyproject.toml | 5 ++++ 17 files changed, 85 insertions(+), 50 deletions(-) create mode 100644 packages/jupyter-ai-magics/jupyter_ai_magics/py.typed diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 518cd437c..05224180e 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -1,4 +1,4 @@ -name: Python Unit Tests +name: Python Tests # suppress warning raised by https://github.com/jupyter/jupyter_core/pull/292 env: @@ -12,7 +12,7 @@ on: jobs: unit-tests: - name: Linux + name: Unit tests runs-on: ubuntu-latest steps: - name: Checkout @@ -28,3 +28,22 @@ jobs: run: | set -eux pytest -vv -r ap --cov jupyter_ai + + typing-tests: + name: Typing test + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Base Setup + uses: jupyterlab/maintainer-tools/.github/actions/base-setup@v1 + + - name: Install extension dependencies and build the extension + run: ./scripts/install.sh + + - name: Run mypy + run: | + set -eux + mypy --version + mypy packages/jupyter-ai diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py b/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py index 147f6ceec..f2ee0cd54 100644 --- a/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py +++ b/packages/jupyter-ai-magics/jupyter_ai_magics/models/completion.py @@ -43,6 +43,7 @@ class InlineCompletionItem(BaseModel): class CompletionError(BaseModel): type: str + title: str traceback: str diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/py.typed b/packages/jupyter-ai-magics/jupyter_ai_magics/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py index 5c3026685..2da303e5a 100644 --- a/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py +++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py @@ -72,6 +72,7 @@ async def process_message(self, message: HumanChatMessage): try: with self.pending("Searching learned documents", message): + assert self.llm_chain result = await self.llm_chain.acall({"question": query}) response = result["answer"] self.reply(response, message) diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py index b97015518..347bfbf83 100644 --- a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py +++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py @@ -13,6 +13,7 @@ Optional, Type, Union, + cast, ) from uuid import uuid4 @@ -28,6 +29,7 @@ ) from jupyter_ai_magics import Persona from jupyter_ai_magics.providers import BaseProvider +from langchain.chains import LLMChain from langchain.pydantic_v1 import BaseModel if TYPE_CHECKING: @@ -36,8 +38,8 @@ from langchain_core.chat_history import BaseChatMessageHistory -def get_preferred_dir(root_dir: str, preferred_dir: str) -> Optional[str]: - if preferred_dir != "": +def get_preferred_dir(root_dir: str, preferred_dir: Optional[str]) -> Optional[str]: + if preferred_dir is not None and preferred_dir != "": preferred_dir = os.path.expanduser(preferred_dir) if not preferred_dir.startswith(root_dir): preferred_dir = os.path.join(root_dir, preferred_dir) @@ -47,7 +49,7 @@ def get_preferred_dir(root_dir: str, preferred_dir: str) -> Optional[str]: # Chat handler type, with specific attributes for each class HandlerRoutingType(BaseModel): - routing_method: ClassVar[Union[Literal["slash_command"]]] = ... + routing_method: ClassVar[Union[Literal["slash_command"]]] """The routing method that sends commands to this handler.""" @@ -83,17 +85,17 @@ class BaseChatHandler: multiple chat handler classes.""" # Class attributes - id: ClassVar[str] = ... + id: ClassVar[str] """ID for this chat handler; should be unique""" - name: ClassVar[str] = ... + name: ClassVar[str] """User-facing name of this handler""" - help: ClassVar[str] = ... + help: ClassVar[str] """What this chat handler does, which third-party models it contacts, the data it returns to the user, and so on, for display in the UI.""" - routing_type: ClassVar[HandlerRoutingType] = ... + routing_type: ClassVar[HandlerRoutingType] uses_llm: ClassVar[bool] = True """Class attribute specifying whether this chat handler uses the LLM @@ -153,9 +155,9 @@ def __init__( self.help_message_template = help_message_template self.chat_handlers = chat_handlers - self.llm = None - self.llm_params = None - self.llm_chain = None + self.llm: Optional[BaseProvider] = None + self.llm_params: Optional[dict] = None + self.llm_chain: Optional[LLMChain] = None async def on_message(self, message: HumanChatMessage): """ @@ -168,9 +170,8 @@ async def on_message(self, message: HumanChatMessage): # ensure the current slash command is supported if self.routing_type.routing_method == "slash_command": - slash_command = ( - "/" + self.routing_type.slash_id if self.routing_type.slash_id else "" - ) + routing_type = cast(SlashCommandRoutingType, self.routing_type) + slash_command = "/" + routing_type.slash_id if routing_type.slash_id else "" if slash_command in lm_provider_klass.unsupported_slash_commands: self.reply( "Sorry, the selected language model does not support this slash command." diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py index 3e8f194b3..2bf88bafe 100644 --- a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py +++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py @@ -41,10 +41,10 @@ def create_llm_chain( prompt_template = llm.get_chat_prompt_template() self.llm = llm - runnable = prompt_template | llm + runnable = prompt_template | llm # type:ignore if not llm.manages_history: runnable = RunnableWithMessageHistory( - runnable=runnable, + runnable=runnable, # type:ignore[arg-type] get_session_history=self.get_llm_chat_memory, input_messages_key="input", history_messages_key="history", @@ -106,6 +106,7 @@ async def process_message(self, message: HumanChatMessage): # stream response in chunks. this works even if a provider does not # implement streaming, as `astream()` defaults to yielding `_call()` # when `_stream()` is not implemented on the LLM class. + assert self.llm_chain async for chunk in self.llm_chain.astream( {"input": message.body}, config={"configurable": {"last_human_msg": message}}, @@ -117,7 +118,7 @@ async def process_message(self, message: HumanChatMessage): stream_id = self._start_stream(human_msg=message) received_first_chunk = True - if isinstance(chunk, AIMessageChunk): + if isinstance(chunk, AIMessageChunk) and isinstance(chunk.content, str): self._send_stream_chunk(stream_id, chunk.content) elif isinstance(chunk, str): self._send_stream_chunk(stream_id, chunk) diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py index d6ecc6d81..1056e592c 100644 --- a/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py +++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/fix.py @@ -93,6 +93,7 @@ async def process_message(self, message: HumanChatMessage): self.get_llm_chain() with self.pending("Analyzing error", message): + assert self.llm_chain response = await self.llm_chain.apredict( extra_instructions=extra_instructions, stop=["\nHuman:"], diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py index 52398eabe..a69b5ed28 100644 --- a/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py +++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py @@ -226,7 +226,7 @@ class GenerateChatHandler(BaseChatHandler): def __init__(self, log_dir: Optional[str], *args, **kwargs): super().__init__(*args, **kwargs) self.log_dir = Path(log_dir) if log_dir else None - self.llm = None + self.llm: Optional[BaseProvider] = None def create_llm_chain( self, provider: Type[BaseProvider], provider_params: Dict[str, str] @@ -248,6 +248,7 @@ async def _generate_notebook(self, prompt: str): # Save the user input prompt, the description property is now LLM generated. outline["prompt"] = prompt + assert self.llm if self.llm.allows_concurrency: # fill the outline concurrently await afill_outline(outline, llm=self.llm, verbose=True) diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py index 29e147f22..3d1c46661 100644 --- a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py +++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py @@ -223,7 +223,9 @@ async def learn_dir( } splitter = ExtensionSplitter( splitters=splitters, - default_splitter=RecursiveCharacterTextSplitter(**splitter_kwargs), + default_splitter=RecursiveCharacterTextSplitter( + **splitter_kwargs # type:ignore[arg-type] + ), ) delayed = split(path, all_files, splitter=splitter) @@ -352,7 +354,7 @@ async def aget_relevant_documents( self, query: str ) -> Coroutine[Any, Any, List[Document]]: if not self.index: - return [] + return [] # type:ignore[return-value] await self.delete_and_relearn() docs = self.index.similarity_search(query) @@ -370,12 +372,14 @@ def get_embedding_model(self): class Retriever(BaseRetriever): - learn_chat_handler: LearnChatHandler = None + learn_chat_handler: LearnChatHandler = None # type:ignore[assignment] - def _get_relevant_documents(self, query: str) -> List[Document]: + def _get_relevant_documents( # type:ignore[override] + self, query: str + ) -> List[Document]: raise NotImplementedError() - async def _aget_relevant_documents( + async def _aget_relevant_documents( # type:ignore[override] self, query: str ) -> Coroutine[Any, Any, List[Document]]: docs = await self.learn_chat_handler.aget_relevant_documents(query) diff --git a/packages/jupyter-ai/jupyter_ai/config_manager.py b/packages/jupyter-ai/jupyter_ai/config_manager.py index d7674c5a2..2f38ca992 100644 --- a/packages/jupyter-ai/jupyter_ai/config_manager.py +++ b/packages/jupyter-ai/jupyter_ai/config_manager.py @@ -3,7 +3,7 @@ import os import shutil import time -from typing import List, Optional, Union +from typing import List, Optional, Type, Union from deepmerge import always_merger as Merger from jsonschema import Draft202012Validator as Validator @@ -60,7 +60,7 @@ class BlockedModelError(Exception): pass -def _validate_provider_authn(config: GlobalConfig, provider: AnyProvider): +def _validate_provider_authn(config: GlobalConfig, provider: Type[AnyProvider]): # TODO: handle non-env auth strategies if not provider.auth_strategy or provider.auth_strategy.type != "env": return @@ -147,7 +147,7 @@ def _init_config_schema(self): os.makedirs(os.path.dirname(self.schema_path), exist_ok=True) shutil.copy(OUR_SCHEMA_PATH, self.schema_path) - def _init_validator(self) -> Validator: + def _init_validator(self) -> None: with open(OUR_SCHEMA_PATH, encoding="utf-8") as f: schema = json.loads(f.read()) Validator.check_schema(schema) @@ -364,7 +364,7 @@ def delete_api_key(self, key_name: str): config_dict["api_keys"].pop(key_name, None) self._write_config(GlobalConfig(**config_dict)) - def update_config(self, config_update: UpdateConfigRequest): + def update_config(self, config_update: UpdateConfigRequest): # type:ignore last_write = os.stat(self.config_path).st_mtime_ns if config_update.last_read and config_update.last_read < last_write: raise WriteConflictError( diff --git a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py index 7b9b28328..c8af71d84 100644 --- a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py +++ b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py @@ -30,7 +30,7 @@ def arxiv_to_text(id: str, output_dir: str) -> str: output path to the downloaded TeX file """ - import arxiv + import arxiv # type:ignore[import-not-found,import-untyped] outfile = f"{id}-{datetime.now():%Y-%m-%d-%H-%M}.tex" download_filename = "downloaded-paper.tar.gz" diff --git a/packages/jupyter-ai/jupyter_ai/extension.py b/packages/jupyter-ai/jupyter_ai/extension.py index 78c4dfefa..34b484546 100644 --- a/packages/jupyter-ai/jupyter_ai/extension.py +++ b/packages/jupyter-ai/jupyter_ai/extension.py @@ -52,7 +52,7 @@ class AiExtension(ExtensionApp): name = "jupyter_ai" - handlers = [ + handlers = [ # type:ignore[assignment] (r"api/ai/api_keys/(?P\w+)", ApiKeysHandler), (r"api/ai/config/?", GlobalConfigHandler), (r"api/ai/chats/?", RootChatHandler), diff --git a/packages/jupyter-ai/jupyter_ai/handlers.py b/packages/jupyter-ai/jupyter_ai/handlers.py index a614e3e84..7c4f16e63 100644 --- a/packages/jupyter-ai/jupyter_ai/handlers.py +++ b/packages/jupyter-ai/jupyter_ai/handlers.py @@ -4,7 +4,7 @@ import uuid from asyncio import AbstractEventLoop from dataclasses import asdict -from typing import TYPE_CHECKING, Dict, List, Optional +from typing import TYPE_CHECKING, Dict, List, Optional, cast import tornado from jupyter_ai.chat_handlers import BaseChatHandler, SlashCommandRoutingType @@ -42,14 +42,12 @@ from jupyter_ai_magics.embedding_providers import BaseEmbeddingsProvider from jupyter_ai_magics.providers import BaseProvider - from .history import BoundChatHistory + from .history import BoundedChatHistory class ChatHistoryHandler(BaseAPIHandler): """Handler to return message history""" - _messages = [] - @property def chat_history(self) -> List[ChatMessage]: return self.settings["chat_history"] @@ -103,7 +101,7 @@ def chat_history(self, new_history): self.settings["chat_history"] = new_history @property - def llm_chat_memory(self) -> "BoundChatHistory": + def llm_chat_memory(self) -> "BoundedChatHistory": return self.settings["llm_chat_memory"] @property @@ -145,6 +143,7 @@ def get_chat_user(self) -> ChatUser: environment.""" # Get a dictionary of all loaded extensions. # (`serverapp` is a property on all `JupyterHandler` subclasses) + assert self.serverapp extensions = self.serverapp.extension_manager.extensions collaborative = ( "jupyter_collaboration" in extensions @@ -401,7 +400,7 @@ def filter_predicate(local_model_id: str): if self.blocked_models: return model_id not in self.blocked_models else: - return model_id in self.allowed_models + return model_id in cast(List, self.allowed_models) # filter out every model w/ model ID according to allow/blocklist for provider in providers: @@ -514,7 +513,7 @@ def post(self): class ApiKeysHandler(BaseAPIHandler): @property - def config_manager(self) -> ConfigManager: + def config_manager(self) -> ConfigManager: # type:ignore[override] return self.settings["jai_config_manager"] @web.authenticated @@ -529,7 +528,7 @@ class SlashCommandsInfoHandler(BaseAPIHandler): """List slash commands that are currently available to the user.""" @property - def config_manager(self) -> ConfigManager: + def config_manager(self) -> ConfigManager: # type:ignore[override] return self.settings["jai_config_manager"] @property diff --git a/packages/jupyter-ai/jupyter_ai/history.py b/packages/jupyter-ai/jupyter_ai/history.py index c857b4486..0f1ba7dc0 100644 --- a/packages/jupyter-ai/jupyter_ai/history.py +++ b/packages/jupyter-ai/jupyter_ai/history.py @@ -25,7 +25,7 @@ class BoundedChatHistory(BaseChatMessageHistory, BaseModel): _all_messages: List[BaseMessage] = PrivateAttr(default_factory=list) @property - def messages(self) -> List[BaseMessage]: + def messages(self) -> List[BaseMessage]: # type:ignore[override] if self.k is None: return self._all_messages return self._all_messages[-self.k * 2 :] @@ -92,7 +92,7 @@ class WrappedBoundedChatHistory(BaseChatMessageHistory, BaseModel): last_human_msg: HumanChatMessage @property - def messages(self) -> List[BaseMessage]: + def messages(self) -> List[BaseMessage]: # type:ignore[override] return self.history.messages def add_message(self, message: BaseMessage) -> None: diff --git a/packages/jupyter-ai/jupyter_ai/models.py b/packages/jupyter-ai/jupyter_ai/models.py index f9098a12a..bda4d3421 100644 --- a/packages/jupyter-ai/jupyter_ai/models.py +++ b/packages/jupyter-ai/jupyter_ai/models.py @@ -70,8 +70,7 @@ class ChatClient(ChatUser): id: str -class AgentChatMessage(BaseModel): - type: Literal["agent"] = "agent" +class BaseAgentMessage(BaseModel): id: str time: float body: str @@ -89,7 +88,11 @@ class AgentChatMessage(BaseModel): """ -class AgentStreamMessage(AgentChatMessage): +class AgentChatMessage(BaseAgentMessage): + type: Literal["agent"] = "agent" + + +class AgentStreamMessage(BaseAgentMessage): type: Literal["agent-stream"] = "agent-stream" complete: bool # other attrs inherited from `AgentChatMessage` @@ -138,15 +141,13 @@ class PendingMessage(BaseModel): class ClosePendingMessage(BaseModel): - type: Literal["pending"] = "close-pending" + type: Literal["close-pending"] = "close-pending" id: str # the type of messages being broadcast to clients ChatMessage = Union[ - AgentChatMessage, - HumanChatMessage, - AgentStreamMessage, + AgentChatMessage, HumanChatMessage, AgentStreamMessage, AgentStreamChunkMessage ] @@ -164,8 +165,7 @@ class ConnectionMessage(BaseModel): Message = Union[ - AgentChatMessage, - HumanChatMessage, + ChatMessage, ConnectionMessage, ClearMessage, PendingMessage, diff --git a/packages/jupyter-ai/pyproject.toml b/packages/jupyter-ai/pyproject.toml index e8deeb133..88f5fc55f 100644 --- a/packages/jupyter-ai/pyproject.toml +++ b/packages/jupyter-ai/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "faiss-cpu<=1.8.0", # Not distributed by official repo "typing_extensions>=4.5.0", "traitlets>=5.0", - "deepmerge>=1.0", + "deepmerge>=2.0,<3", ] dynamic = ["version", "description", "authors", "urls", "keywords"] @@ -50,6 +50,8 @@ test = [ "pytest-tornasync", "pytest-jupyter", "syrupy~=4.0.8", + "types-jsonschema", + "mypy", ] dev = ["jupyter_ai_magics[dev]"] diff --git a/pyproject.toml b/pyproject.toml index cc45dd977..d10ce4594 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,3 +43,8 @@ ignore_imports = ["jupyter_ai_magics.providers -> pydantic"] [tool.pytest.ini_options] addopts = "--ignore packages/jupyter-ai-module-cookiecutter" + +[tool.mypy] +exclude = [ + "tests" +]