From 8c4380d3a58964b5c4ce09af62854089ee66dc44 Mon Sep 17 00:00:00 2001
From: michael <michael_chia7@msn.com>
Date: Thu, 12 Sep 2024 21:52:23 +0800
Subject: [PATCH] context provider

---
 .../jupyter_ai_magics/providers.py            |  29 +++-
 .../jupyter_ai/chat_handlers/base.py          |   7 +
 .../jupyter_ai/chat_handlers/default.py       |  26 ++-
 .../jupyter_ai/context_providers/__init__.py  |   3 +
 .../jupyter_ai/context_providers/_examples.py | 133 +++++++++++++++
 .../jupyter_ai/context_providers/base.py      | 157 ++++++++++++++++++
 .../jupyter_ai/context_providers/file.py      | 116 +++++++++++++
 .../jupyter_ai/context_providers/learned.py   |  50 ++++++
 packages/jupyter-ai/jupyter_ai/extension.py   |  60 +++++++
 packages/jupyter-ai/jupyter_ai/handlers.py    | 101 +++++++++++
 packages/jupyter-ai/jupyter_ai/models.py      |  34 ++++
 .../tests/test_context_providers.py           |  63 +++++++
 .../jupyter_ai/tests/test_handlers.py         |   1 +
 .../jupyter-ai/src/components/chat-input.tsx  | 106 +++++++++---
 packages/jupyter-ai/src/handler.ts            |  34 ++++
 15 files changed, 889 insertions(+), 31 deletions(-)
 create mode 100644 packages/jupyter-ai/jupyter_ai/context_providers/__init__.py
 create mode 100644 packages/jupyter-ai/jupyter_ai/context_providers/_examples.py
 create mode 100644 packages/jupyter-ai/jupyter_ai/context_providers/base.py
 create mode 100644 packages/jupyter-ai/jupyter_ai/context_providers/file.py
 create mode 100644 packages/jupyter-ai/jupyter_ai/context_providers/learned.py
 create mode 100644 packages/jupyter-ai/jupyter_ai/tests/test_context_providers.py
diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py
index 551db8bbc..3b6c672a1 100644
--- a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py
+++ b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py
@@ -67,11 +67,26 @@
 The following is a friendly conversation between you and a human.
 """.strip()
 
-CHAT_DEFAULT_TEMPLATE = """Current conversation:
-{history}
-Human: {input}
+CHAT_DEFAULT_TEMPLATE = """
+{% if context %}
+Context:
+{{context}}
+
+{% endif %}
+Current conversation:
+{{history}}
+Human: {{input}}
 AI:"""
 
+HUMAN_MESSAGE_TEMPLATE = """
+{% if context %}
+<context>
+{{context}}
+</context>
+
+{% endif %}
+{{input}}
+"""
 
 COMPLETION_SYSTEM_PROMPT = """
 You are an application built to provide helpful code completion suggestions.
@@ -400,17 +415,21 @@ def get_chat_prompt_template(self) -> PromptTemplate:
                         CHAT_SYSTEM_PROMPT
                     ).format(provider_name=name, local_model_id=self.model_id),
                     MessagesPlaceholder(variable_name="history"),
-                    HumanMessagePromptTemplate.from_template("{input}"),
+                    HumanMessagePromptTemplate.from_template(
+                        HUMAN_MESSAGE_TEMPLATE,
+                        template_format="jinja2",
+                    ),
                 ]
             )
         else:
             return PromptTemplate(
-                input_variables=["history", "input"],
+                input_variables=["history", "input", "context"],
                 template=CHAT_SYSTEM_PROMPT.format(
                     provider_name=name, local_model_id=self.model_id
                 )
                 + "\n\n"
                 + CHAT_DEFAULT_TEMPLATE,
+                template_format="jinja2",
             )
 
     def get_completion_prompt_template(self) -> PromptTemplate:
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
index 347bfbf83..2e5786f57 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
@@ -35,6 +35,7 @@
 if TYPE_CHECKING:
     from jupyter_ai.handlers import RootChatHandler
     from jupyter_ai.history import BoundedChatHistory
+    from jupyter_ai.context_providers import BaseContextProvider
     from langchain_core.chat_history import BaseChatMessageHistory
 
 
@@ -121,6 +122,10 @@ class BaseChatHandler:
     chat handlers, which is necessary for some use-cases like printing the help
     message."""
 
+    context_providers: Dict[str, Type["BaseContextProvider"]]
+    """Dictionary of context providers. Allows chat handlers to reference
+    context providers, which can be used to provide context to the LLM."""
+
     def __init__(
         self,
         log: Logger,
@@ -134,6 +139,7 @@ def __init__(
         dask_client_future: Awaitable[DaskClient],
         help_message_template: str,
         chat_handlers: Dict[str, "BaseChatHandler"],
+        context_providers: Dict[str, Type["BaseContextProvider"]],
     ):
         self.log = log
         self.config_manager = config_manager
@@ -154,6 +160,7 @@ def __init__(
         self.dask_client_future = dask_client_future
         self.help_message_template = help_message_template
         self.chat_handlers = chat_handlers
+        self.context_providers = context_providers
 
         self.llm: Optional[BaseProvider] = None
         self.llm_params: Optional[dict] = None
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
index 2bf88bafe..cc905446b 100644
--- a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
+++ b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
@@ -1,3 +1,4 @@
+import asyncio
 import time
 from typing import Dict, Type
 from uuid import uuid4
@@ -13,6 +14,7 @@
 from langchain_core.runnables.history import RunnableWithMessageHistory
 
 from ..models import HumanChatMessage
+from ..context_providers import ContextProviderException
 from .base import BaseChatHandler, SlashCommandRoutingType
 
 
@@ -27,6 +29,7 @@ class DefaultChatHandler(BaseChatHandler):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.prompt_template = None
 
     def create_llm_chain(
         self, provider: Type[BaseProvider], provider_params: Dict[str, str]
@@ -40,6 +43,7 @@ def create_llm_chain(
 
         prompt_template = llm.get_chat_prompt_template()
         self.llm = llm
+        self.prompt_template = prompt_template
 
         runnable = prompt_template | llm  # type:ignore
         if not llm.manages_history:
@@ -101,6 +105,16 @@ async def process_message(self, message: HumanChatMessage):
         self.get_llm_chain()
         received_first_chunk = False
 
+        inputs = {"input": message.body}
+        if "context" in self.prompt_template.input_variables:
+            # include context from context providers.
+            try:
+                context_prompt = await self.make_context_prompt(message)
+            except ContextProviderException as e:
+                self.reply(str(e), message)
+                return
+            inputs["context"] = context_prompt
+
         # start with a pending message
         with self.pending("Generating response", message) as pending_message:
             # stream response in chunks. this works even if a provider does not
@@ -108,7 +122,7 @@ async def process_message(self, message: HumanChatMessage):
             # when `_stream()` is not implemented on the LLM class.
             assert self.llm_chain
             async for chunk in self.llm_chain.astream(
-                {"input": message.body},
+                inputs,
                 config={"configurable": {"last_human_msg": message}},
             ):
                 if not received_first_chunk:
@@ -128,3 +142,13 @@ async def process_message(self, message: HumanChatMessage):
 
             # complete stream after all chunks have been streamed
             self._send_stream_chunk(stream_id, "", complete=True)
+
+    async def make_context_prompt(self, human_msg: HumanChatMessage) -> str:
+        return "\n\n".join(
+            await asyncio.gather(
+                *[
+                    provider.make_context_prompt(human_msg)
+                    for provider in self.context_providers.values()
+                ]
+            )
+        )
diff --git a/packages/jupyter-ai/jupyter_ai/context_providers/__init__.py b/packages/jupyter-ai/jupyter_ai/context_providers/__init__.py
new file mode 100644
index 000000000..880faf28b
--- /dev/null
+++ b/packages/jupyter-ai/jupyter_ai/context_providers/__init__.py
@@ -0,0 +1,3 @@
+from .base import BaseContextProvider, ContextProviderException
+from .file import FileContextProvider
+from .learned import LearnedContextProvider
diff --git a/packages/jupyter-ai/jupyter_ai/context_providers/_examples.py b/packages/jupyter-ai/jupyter_ai/context_providers/_examples.py
new file mode 100644
index 000000000..6772d7a3e
--- /dev/null
+++ b/packages/jupyter-ai/jupyter_ai/context_providers/_examples.py
@@ -0,0 +1,133 @@
+# This file is for illustrative purposes
+# It is to be deleted before merging
+from jupyter_ai.models import HumanChatMessage
+from langchain_community.retrievers import WikipediaRetriever
+from langchain_community.retrievers import ArxivRetriever
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+
+from .base import BaseContextProvider
+
+
+# Examples of the ease of implementing retriever based context providers
+ARXIV_TEMPLATE = """
+Title: {title}
+Publish Date: {publish_date}
+'''
+{content}
+'''
+""".strip()
+
+
+class ArxivContextProvider(BaseContextProvider):
+    id = "arvix"
+    description = "Include papers from Arxiv"
+    remove_from_prompt = True
+    header = "Following are snippets of research papers:"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.retriever = ArxivRetriever()
+
+    async def make_context_prompt(self, message: HumanChatMessage) -> str:
+        if not self._find_instances(message.prompt):
+            return ""
+        query = self._clean_prompt(message.body)
+        docs = await self.retriever.ainvoke(query)
+        context = "\n\n".join(
+            [
+                ARXIV_TEMPLATE.format(
+                    content=d.page_content,
+                    title=d.metadata["Title"],
+                    publish_date=d.metadata["Published"],
+                )
+                for d in docs
+            ]
+        )
+        return self.header + "\n" + context
+
+
+# Another retriever based context provider with a rewrite step using LLM
+WIKI_TEMPLATE = """
+Title: {title}
+'''
+{content}
+'''
+""".strip()
+
+REWRITE_TEMPLATE = """Provide a better search query for \
+web search engine to answer the given question, end \
+the queries with ’**’. Question: \
+{x} Answer:"""
+
+
+class WikiContextProvider(BaseContextProvider):
+    id = "wiki"
+    description = "Include knowledge from Wikipedia"
+    remove_from_prompt = True
+    header = "Following are information from wikipedia:"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.retriever = WikipediaRetriever()
+
+    async def make_context_prompt(self, message: HumanChatMessage) -> str:
+        if not self._find_instances(message.prompt):
+            return ""
+        prompt = self._clean_prompt(message.body)
+        search_query = await self._rewrite_prompt(prompt)
+        docs = await self.retriever.ainvoke(search_query)
+        context = "\n\n".join(
+            [
+                WIKI_TEMPLATE.format(
+                    content=d.page_content,
+                    title=d.metadata["title"],
+                )
+                for d in docs
+            ]
+        )
+        return self.header + "\n" + context
+
+    async def _rewrite_prompt(self, prompt: str) -> str:
+        return await self.get_llm_chain().ainvoke(prompt)
+
+    def get_llm_chain(self):
+        # from https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb
+        llm = self.get_llm()
+        rewrite_prompt = ChatPromptTemplate.from_template(REWRITE_TEMPLATE)
+
+        def _parse(text):
+            return text.strip('"').strip("**")
+
+        return rewrite_prompt | llm | StrOutputParser() | _parse
+
+
+# Partial example of non-command context provider for errors.
+# Assuming there is an option in UI to add cell errors to messages,
+# default chat will automatically invoke this context provider to add
+# solutions retrieved from a custom error database or a stackoverflow / google
+# retriever pipeline to find solutions for errors.
+class ErrorContextProvider(BaseContextProvider):
+    id = "error"
+    description = "Include custom error context"
+    remove_from_prompt = True
+    header = "Following are potential solutions for the error:"
+    is_command = False  # will not show up in autocomplete
+
+    async def make_context_prompt(self, message: HumanChatMessage) -> str:
+        # will run for every message with a cell error since it does not
+        # use _find_instances to  check for the presence of the command in
+        # the message.
+        if not (message.selection and message.selection.type == "cell-with-error"):
+            return ""
+        docs = await self.solution_retriever.ainvoke(message.selection)
+        if not docs:
+            return ""
+        context = "\n\n".join([d.page_content for d in docs])
+        return self.header + "\n" + context
+
+    @property
+    def solution_retriever(self):
+        # retriever that takes an error and returns a solutions from a database
+        # of error messages.
+        raise NotImplementedError("Error retriever not implemented")
diff --git a/packages/jupyter-ai/jupyter_ai/context_providers/base.py b/packages/jupyter-ai/jupyter_ai/context_providers/base.py
new file mode 100644
index 000000000..83f970e55
--- /dev/null
+++ b/packages/jupyter-ai/jupyter_ai/context_providers/base.py
@@ -0,0 +1,157 @@
+import abc
+import re
+from typing import ClassVar, List
+
+import os
+from typing import (
+    TYPE_CHECKING,
+    Awaitable,
+    ClassVar,
+    Dict,
+    List,
+    Optional,
+)
+
+from dask.distributed import Client as DaskClient
+from jupyter_ai.config_manager import ConfigManager, Logger
+from jupyter_ai.models import (
+    ChatMessage,
+    HumanChatMessage,
+)
+from jupyter_ai.chat_handlers.base import get_preferred_dir
+from jupyter_ai.models import ListOptionsEntry, HumanChatMessage
+
+if TYPE_CHECKING:
+    from jupyter_ai.history import BoundedChatHistory
+    from jupyter_ai.chat_handlers import BaseChatHandler
+
+
+class BaseContextProvider(abc.ABC):
+    id: ClassVar[str]
+    description: ClassVar[str]
+    requires_arg: ClassVar[bool] = False
+    is_command: ClassVar[bool] = (
+        True  # whether the context provider can be invoked from chat
+    )
+    remove_from_prompt: ClassVar[bool] = (
+        False  # whether the command should be removed from prompt
+    )
+
+    def __init__(
+        self,
+        *,
+        log: Logger,
+        config_manager: ConfigManager,
+        model_parameters: Dict[str, Dict],
+        chat_history: List[ChatMessage],
+        llm_chat_memory: "BoundedChatHistory",
+        root_dir: str,
+        preferred_dir: Optional[str],
+        dask_client_future: Awaitable[DaskClient],
+        chat_handlers: Dict[str, "BaseChatHandler"],
+        context_providers: Dict[str, "BaseContextProvider"],
+    ):
+        preferred_dir = preferred_dir or ""
+        self.log = log
+        self.config_manager = config_manager
+        self.model_parameters = model_parameters
+        self._chat_history = chat_history
+        self.llm_chat_memory = llm_chat_memory
+        self.root_dir = os.path.abspath(os.path.expanduser(root_dir))
+        self.preferred_dir = get_preferred_dir(self.root_dir, preferred_dir)
+        self.dask_client_future = dask_client_future
+        self.chat_handlers = chat_handlers
+        self.context_providers = context_providers
+
+        self.llm = None
+
+    @property
+    def pattern(self) -> str:
+        return (
+            rf"(?<![^\s.])@{self.id}:[^\s]+"
+            if self.requires_arg
+            else rf"(?<![^\s.])@{self.id}(?![^\s.])"
+        )
+
+    @abc.abstractmethod
+    async def make_context_prompt(self, message: HumanChatMessage) -> str:
+        """Returns a context prompt for all instances of the context provider
+        command.
+        """
+        pass
+
+    def get_arg_options(self, arg_prefix: str) -> List[ListOptionsEntry]:
+        """Returns a list of autocomplete options for arguments to the command
+        based on the prefix.
+        Only triggered if ':' is present after the command id (e.g. '@file:').
+        """
+        return []
+
+    def replace_prompt(self, prompt: str) -> str:
+        """Cleans up instances of the command from the prompt before
+        sending it to the LLM
+        """
+        if self.remove_from_prompt:
+            return re.sub(self.pattern, "", prompt)
+        return prompt
+
+    def _find_instances(self, text: str) -> List[str]:
+        # finds instances of the context provider command in the text
+        matches = re.finditer(self.pattern, text)
+        results = []
+        for match in matches:
+            start, end = match.span()
+            before = text[:start]
+            after = text[end:]
+            # Check if the match is within backticks
+            if before.count("`") % 2 == 0 and after.count("`") % 2 == 0:
+                results.append(match.group())
+        return results
+
+    def _clean_prompt(self, text: str) -> str:
+        # useful for cleaning up the prompt before sending it to a retriever
+        for provider in self.context_providers.values():
+            text = provider.replace_prompt(text)
+        return text
+
+    @property
+    def base_dir(self) -> str:
+        # same as BaseChatHandler.output_dir
+        if self.preferred_dir and os.path.exists(self.preferred_dir):
+            return self.preferred_dir
+        else:
+            return self.root_dir
+
+    def get_llm(self):
+        lm_provider = self.config_manager.lm_provider
+        lm_provider_params = self.config_manager.lm_provider_params
+
+        curr_lm_id = (
+            f'{self.llm.id}:{lm_provider_params["model_id"]}' if self.llm else None
+        )
+        next_lm_id = (
+            f'{lm_provider.id}:{lm_provider_params["model_id"]}'
+            if lm_provider
+            else None
+        )
+
+        if not lm_provider or not lm_provider_params:
+            return None
+
+        if curr_lm_id != next_lm_id:
+            model_parameters = self.model_parameters.get(
+                f"{lm_provider.id}:{lm_provider_params['model_id']}", {}
+            )
+            unified_parameters = {
+                "verbose": True,
+                **lm_provider_params,
+                **model_parameters,
+            }
+            llm = lm_provider(**unified_parameters)
+            self.llm = llm
+        return self.llm
+
+
+class ContextProviderException(Exception):
+    # Used to generate a response when a context provider fails
+    pass
diff --git a/packages/jupyter-ai/jupyter_ai/context_providers/file.py b/packages/jupyter-ai/jupyter_ai/context_providers/file.py
new file mode 100644
index 000000000..2edf1750c
--- /dev/null
+++ b/packages/jupyter-ai/jupyter_ai/context_providers/file.py
@@ -0,0 +1,116 @@
+import os
+import glob
+import re
+from typing import List
+
+import nbformat
+from jupyter_ai.models import ListOptionsEntry, HumanChatMessage
+from jupyter_ai.document_loaders.directory import SUPPORTED_EXTS
+
+from .base import BaseContextProvider, ContextProviderException
+
+FILE_CONTEXT_TEMPLATE = """
+File: {filepath}
+```
+{content}
+```
+""".strip()
+
+
+class FileContextProvider(BaseContextProvider):
+    id = "file"
+    description = "Include file contents"
+    requires_arg = True
+    header = "Following are contents of files referenced:"
+
+    def replace_prompt(self, prompt: str) -> str:
+        # replaces instances of @file:<filepath> with '<filepath>'
+        def substitute(match):
+            filepath = match.group(0).partition(":")[2]
+            return f"'{filepath}'"
+
+        return re.sub(self.pattern, substitute, prompt)
+
+    def get_arg_options(self, arg_prefix: str) -> List[ListOptionsEntry]:
+        is_abs = not os.path.isabs(arg_prefix)
+        path_prefix = arg_prefix if is_abs else os.path.join(self.base_dir, arg_prefix)
+        return [
+            self._make_option(path, is_abs, is_dir)
+            for path in glob.glob(path_prefix + "*")
+            if (
+                (is_dir := os.path.isdir(path))
+                or os.path.splitext(path)[1] in SUPPORTED_EXTS
+            )
+        ]
+
+    def _make_option(self, path: str, is_abs: bool, is_dir: bool) -> ListOptionsEntry:
+        if not is_abs:
+            path = os.path.relpath(path, self.base_dir)
+        if is_dir:
+            path += "/"
+        return ListOptionsEntry.from_arg(
+            type="@",
+            id=self.id,
+            description="Directory" if is_dir else "File",
+            arg=path,
+            is_complete=not is_dir,
+        )
+
+    async def make_context_prompt(self, message: HumanChatMessage) -> str:
+        instances = set(self._find_instances(message.prompt))
+        if not instances:
+            return ""
+        context = "\n\n".join(
+            [context for i in instances if (context := self._make_instance_context(i))]
+        )
+        if not context:
+            return ""
+        return self.header + "\n" + context
+
+    def _make_instance_context(self, instance: str) -> str:
+        filepath = instance.partition(":")[2]
+        if not os.path.isabs(filepath):
+            filepath = os.path.join(self.base_dir, filepath)
+
+        if not os.path.exists(filepath):
+            raise ContextProviderException(
+                f"File not found while trying to read '{filepath}' "
+                f"triggered by `{instance}`."
+            )
+        if os.path.isdir(filepath):
+            raise ContextProviderException(
+                f"Cannot read directory '{filepath}' triggered by `{instance}`. "
+                f"Only files are supported."
+            )
+        if os.path.splitext(filepath)[1] not in SUPPORTED_EXTS:
+            raise ContextProviderException(
+                f"Cannot read unsupported file type '{filepath}' triggered by `{instance}`. "
+                f"Supported file extensions are: {', '.join(SUPPORTED_EXTS)}."
+            )
+        try:
+            with open(filepath) as f:
+                content = f.read()
+        except PermissionError:
+            raise ContextProviderException(
+                f"Permission denied while trying to read '{filepath}' "
+                f"triggered by `{instance}`."
+            )
+        return FILE_CONTEXT_TEMPLATE.format(
+            filepath=filepath,
+            content=self._process_file(content, filepath),
+        )
+
+    def _process_file(self, content: str, filepath: str):
+        if filepath.endswith(".ipynb"):
+            nb = nbformat.reads(content, as_version=4)
+            return "\n\n".join([cell.source for cell in nb.cells])
+        return content
+
+    def get_filepaths(self, message: HumanChatMessage) -> List[str]:
+        filepaths = []
+        for instance in self._find_instances(message.prompt):
+            filepath = instance.partition(":")[2]
+            if not os.path.isabs(filepath):
+                filepath = os.path.join(self.base_dir, filepath)
+            filepaths.append(filepath)
+        return filepaths
diff --git a/packages/jupyter-ai/jupyter_ai/context_providers/learned.py b/packages/jupyter-ai/jupyter_ai/context_providers/learned.py
new file mode 100644
index 000000000..72f81ef41
--- /dev/null
+++ b/packages/jupyter-ai/jupyter_ai/context_providers/learned.py
@@ -0,0 +1,50 @@
+from typing import List
+
+from jupyter_ai.models import HumanChatMessage
+from jupyter_ai.chat_handlers.learn import Retriever
+
+from .base import BaseContextProvider
+from .file import FileContextProvider
+
+
+FILE_CHUNK_TEMPLATE = """
+Snippet from file: {filepath}
+```
+{content}
+```
+""".strip()
+
+
+class LearnedContextProvider(BaseContextProvider):
+    id = "learned"
+    description = "Include learned context"
+    remove_from_prompt = True
+    header = "Following are snippets from potentially relevant files:"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.retriever = Retriever(learn_chat_handler=self.chat_handlers["/learn"])
+
+    async def make_context_prompt(self, message: HumanChatMessage) -> str:
+        if not self.retriever or not self._find_instances(message.prompt):
+            return ""
+        query = self._clean_prompt(message.body)
+        docs = await self.retriever.ainvoke(query)
+        excluded = self._get_repeated_files(message)
+        context = "\n\n".join(
+            [
+                FILE_CHUNK_TEMPLATE.format(
+                    filepath=d.metadata["path"], content=d.page_content
+                )
+                for d in docs
+                if d.metadata["path"] not in excluded and d.page_content
+            ]
+        )
+        return self.header + "\n" + context
+
+    def _get_repeated_files(self, message: HumanChatMessage) -> List[str]:
+        # don't include files that are already provided by the file context provider
+        file_context_provider = self.context_providers.get("file")
+        if isinstance(file_context_provider, FileContextProvider):
+            return file_context_provider.get_filepaths(message)
+        return []
diff --git a/packages/jupyter-ai/jupyter_ai/extension.py b/packages/jupyter-ai/jupyter_ai/extension.py
index 34b484546..c76ebc412 100644
--- a/packages/jupyter-ai/jupyter_ai/extension.py
+++ b/packages/jupyter-ai/jupyter_ai/extension.py
@@ -22,6 +22,7 @@
     HelpChatHandler,
     LearnChatHandler,
 )
+from .context_providers import FileContextProvider, LearnedContextProvider
 from .completions.handlers import DefaultInlineCompletionHandler
 from .config_manager import ConfigManager
 from .handlers import (
@@ -32,6 +33,7 @@
     ModelProviderHandler,
     RootChatHandler,
     SlashCommandsInfoHandler,
+    AutocompleteOptionsHandler,
 )
 from .history import BoundedChatHistory
 
@@ -58,6 +60,7 @@ class AiExtension(ExtensionApp):
         (r"api/ai/chats/?", RootChatHandler),
         (r"api/ai/chats/history?", ChatHistoryHandler),
         (r"api/ai/chats/slash_commands?", SlashCommandsInfoHandler),
+        (r"api/ai/chats/autocomplete_options?", AutocompleteOptionsHandler),
         (r"api/ai/providers?", ModelProviderHandler),
         (r"api/ai/providers/embeddings?", EmbeddingsModelProviderHandler),
         (r"api/ai/completion/inline/?", DefaultInlineCompletionHandler),
@@ -285,6 +288,10 @@ def initialize_settings(self):
 
         eps = entry_points()
 
+        # Create empty context providers dict to be filled later.
+        # This is created early to use as kwargs for chat handlers.
+        self.settings["jai_context_providers"] = {}
+
         # initialize chat handlers
         chat_handler_eps = eps.select(group="jupyter_ai.chat_handlers")
         chat_handlers = {}
@@ -301,6 +308,7 @@ def initialize_settings(self):
             "preferred_dir": self.serverapp.contents_manager.preferred_dir,
             "help_message_template": self.help_message_template,
             "chat_handlers": chat_handlers,
+            "context_providers": self.settings["jai_context_providers"],
         }
         default_chat_handler = DefaultChatHandler(**chat_handler_kwargs)
         clear_chat_handler = ClearChatHandler(**chat_handler_kwargs)
@@ -376,6 +384,58 @@ def initialize_settings(self):
         # bind chat handlers to settings
         self.settings["jai_chat_handlers"] = chat_handlers
 
+        # initialize context providers
+        context_providers = self.settings["jai_context_providers"]
+        context_providers_kwargs = {
+            "log": self.log,
+            "config_manager": self.settings["jai_config_manager"],
+            "model_parameters": self.settings["model_parameters"],
+            "chat_history": self.settings["chat_history"],
+            "llm_chat_memory": self.settings["llm_chat_memory"],
+            "root_dir": self.serverapp.root_dir,
+            "dask_client_future": self.settings["dask_client_future"],
+            "model_parameters": self.settings["model_parameters"],
+            "preferred_dir": self.serverapp.contents_manager.preferred_dir,
+            "chat_handlers": self.settings["jai_chat_handlers"],
+            "context_providers": self.settings["jai_context_providers"],
+        }
+        context_providers_clses = [
+            FileContextProvider,
+            LearnedContextProvider,
+        ]
+        context_providers_eps = eps.select(group="jupyter_ai.context_providers")
+        for context_provider_ep in context_providers_eps:
+            try:
+                context_provider = context_provider_ep.load()
+            except Exception as err:
+                self.log.error(
+                    f"Unable to load context provider class from entry point `{context_provider_ep.name}`: "
+                    + f"Unexpected {err=}, {type(err)=}"
+                )
+                continue
+            context_providers_clses.append(context_provider)
+
+        for context_provider in context_providers_clses:
+            if context_provider.id in context_providers:
+                self.log.error(
+                    f"Unable to register context provider `{context_provider.id}` because it already exists"
+                )
+                continue
+
+            if context_provider.is_command and not re.match(
+                r"^[a-zA-Z0-9_]+$", context_provider.id
+            ):
+                self.log.error(
+                    f"Context provider `{context_provider.id}` is an invalid ID; "
+                    + f"must contain only letters, numbers, and underscores"
+                )
+                continue
+
+            context_providers[context_provider.id] = context_provider(
+                **context_providers_kwargs
+            )
+            self.log.info(f"Registered context provider `{context_provider.id}`.")
+
         # show help message at server start
         self._show_help_message()
 
diff --git a/packages/jupyter-ai/jupyter_ai/handlers.py b/packages/jupyter-ai/jupyter_ai/handlers.py
index 7c4f16e63..6a1df7c03 100644
--- a/packages/jupyter-ai/jupyter_ai/handlers.py
+++ b/packages/jupyter-ai/jupyter_ai/handlers.py
@@ -33,6 +33,8 @@
     ListProvidersResponse,
     ListSlashCommandsEntry,
     ListSlashCommandsResponse,
+    ListOptionsEntry,
+    ListOptionsResponse,
     Message,
     PendingMessage,
     UpdateConfigRequest,
@@ -43,6 +45,7 @@
     from jupyter_ai_magics.providers import BaseProvider
 
     from .history import BoundedChatHistory
+    from .context_providers import BaseContextProvider
 
 
 class ChatHistoryHandler(BaseAPIHandler):
@@ -571,3 +574,101 @@ def get(self):
         # sort slash commands by slash id and deliver the response
         response.slash_commands.sort(key=lambda sc: sc.slash_id)
         self.finish(response.json())
+
+
+class AutocompleteOptionsHandler(BaseAPIHandler):
+    """List context that are currently available to the user."""
+
+    @property
+    def config_manager(self) -> ConfigManager:
+        return self.settings["jai_config_manager"]
+
+    @property
+    def context_providers(self) -> Dict[str, "BaseContextProvider"]:
+        return self.settings["jai_context_providers"]
+
+    @property
+    def chat_handlers(self) -> Dict[str, "BaseChatHandler"]:
+        return self.settings["jai_chat_handlers"]
+
+    @web.authenticated
+    def get(self):
+        response = ListOptionsResponse()
+
+        # if no selected LLM, return an empty response
+        if not self.config_manager.lm_provider:
+            self.finish(response.json())
+            return
+
+        response.options = (
+            self._get_slash_command_options() + self._get_context_provider_options()
+        )
+        self.finish(response.json())
+
+    @web.authenticated
+    def post(self):
+        try:
+            data = self.get_json_body()
+            context_provider = self.context_providers.get(data["id"])
+            arg_prefix = data["arg_prefix"]
+            response = ListOptionsResponse()
+
+            if not context_provider:
+                self.finish(response.json())
+                return
+
+            response.options = context_provider.get_arg_options(arg_prefix)
+            self.finish(response.json())
+        except (ValidationError, WriteConflictError, KeyEmptyError) as e:
+            self.log.exception(e)
+            raise HTTPError(500, str(e)) from e
+        except ValueError as e:
+            self.log.exception(e)
+            raise HTTPError(500, str(e.cause) if hasattr(e, "cause") else str(e))
+        except Exception as e:
+            self.log.exception(e)
+            raise HTTPError(
+                500, "Unexpected error occurred while updating the context provider."
+            ) from e
+
+    def _get_slash_command_options(self) -> List[ListOptionsEntry]:
+        options = []
+        for id, chat_handler in self.chat_handlers.items():
+            # filter out any chat handler that is not a slash command
+            if (
+                id == "default"
+                or chat_handler.routing_type.routing_method != "slash_command"
+            ):
+                continue
+
+            # hint the type of this attribute
+            routing_type: SlashCommandRoutingType = chat_handler.routing_type
+
+            # filter out any chat handler that is unsupported by the current LLM
+            if (
+                "/" + routing_type.slash_id
+                in self.config_manager.lm_provider.unsupported_slash_commands
+            ):
+                continue
+
+            options.append(
+                ListOptionsEntry.from_command(
+                    type="/", id=routing_type.slash_id, description=chat_handler.help
+                )
+            )
+        options.sort(key=lambda opt: opt.id)
+        return options
+
+    def _get_context_provider_options(self) -> List[ListOptionsEntry]:
+        options = [
+            ListOptionsEntry.from_command(
+                type="@",
+                id=context_provider.id,
+                description=context_provider.description,
+                requires_arg=context_provider.requires_arg,
+            )
+            for context_provider in self.context_providers.values()
+            if context_provider.is_command
+        ]
+        options.sort(key=lambda opt: opt.id)
+        return options
diff --git a/packages/jupyter-ai/jupyter_ai/models.py b/packages/jupyter-ai/jupyter_ai/models.py
index bda4d3421..0a3476bb4 100644
--- a/packages/jupyter-ai/jupyter_ai/models.py
+++ b/packages/jupyter-ai/jupyter_ai/models.py
@@ -263,3 +263,37 @@ class ListSlashCommandsEntry(BaseModel):
 
 class ListSlashCommandsResponse(BaseModel):
     slash_commands: List[ListSlashCommandsEntry] = []
+
+
+class ListOptionsEntry(BaseModel):
+    type: Literal["/", "@"]
+    id: str
+    label: str
+    description: str
+
+    @classmethod
+    def from_command(
+        cls,
+        type: Literal["/", "@"],
+        id: str,
+        description: str,
+        requires_arg: bool = False,
+    ):
+        label = type + id + (":" if requires_arg else " ")
+        return cls(type=type, id=id, description=description, label=label)
+
+    @classmethod
+    def from_arg(
+        cls,
+        type: Literal["/", "@"],
+        id: str,
+        description: str,
+        arg: str,
+        is_complete: bool = True,
+    ):
+        label = type + id + ":" + arg + (" " if is_complete else "")
+        return cls(type=type, id=id, description=description, label=label)
+
+
+class ListOptionsResponse(BaseModel):
+    options: List[ListOptionsEntry] = []
diff --git a/packages/jupyter-ai/jupyter_ai/tests/test_context_providers.py b/packages/jupyter-ai/jupyter_ai/tests/test_context_providers.py
new file mode 100644
index 000000000..b7c97596e
--- /dev/null
+++ b/packages/jupyter-ai/jupyter_ai/tests/test_context_providers.py
@@ -0,0 +1,63 @@
+import logging
+from unittest import mock
+
+import pytest
+from jupyter_ai.context_providers import FileContextProvider
+from jupyter_ai.config_manager import ConfigManager
+from jupyter_ai.history import BoundedChatHistory
+from jupyter_ai.models import (
+    ChatClient,
+    HumanChatMessage,
+    Persona,
+)
+
+
+@pytest.fixture
+def human_chat_message() -> HumanChatMessage:
+    chat_client = ChatClient(
+        id=0, username="test", initials="test", name="test", display_name="test"
+    )
+    prompt = (
+        "@file:test1.py @file @file:dir/test2.md test test\n"
+        "@file:/dir/test3.png test@file:test4.py"
+    )
+    return HumanChatMessage(
+        id="test",
+        time=0,
+        body=prompt,
+        prompt=prompt,
+        client=chat_client,
+    )
+
+
+@pytest.fixture
+def file_context_provider() -> FileContextProvider:
+    config_manager = mock.create_autospec(ConfigManager)
+    config_manager.persona = Persona(name="test", avatar_route="test")
+    return FileContextProvider(
+        log=logging.getLogger(__name__),
+        config_manager=config_manager,
+        model_parameters={},
+        chat_history=[],
+        llm_chat_memory=BoundedChatHistory(k=2),
+        root_dir="",
+        preferred_dir="",
+        dask_client_future=None,
+        chat_handlers={},
+        context_providers={},
+    )
+
+
+def test_find_instances(file_context_provider, human_chat_message):
+    expected = ["@file:test1.py", "@file:dir/test2.md", "@file:/dir/test3.png"]
+    instances = file_context_provider._find_instances(human_chat_message.prompt)
+    assert instances == expected
+
+
+def test_replace_prompt(file_context_provider, human_chat_message):
+    expected = (
+        "'test1.py' @file 'dir/test2.md' test test\n"
+        "'/dir/test3.png' test@file:test4.py"
+    )
+    prompt = file_context_provider.replace_prompt(human_chat_message.prompt)
+    assert prompt == expected
diff --git a/packages/jupyter-ai/jupyter_ai/tests/test_handlers.py b/packages/jupyter-ai/jupyter_ai/tests/test_handlers.py
index c1ca7b098..a94c3fbf8 100644
--- a/packages/jupyter-ai/jupyter_ai/tests/test_handlers.py
+++ b/packages/jupyter-ai/jupyter_ai/tests/test_handlers.py
@@ -76,6 +76,7 @@ def broadcast_message(message: Message) -> None:
             dask_client_future=None,
             help_message_template=DEFAULT_HELP_MESSAGE_TEMPLATE,
             chat_handlers={},
+            context_providers={},
         )
 
 
diff --git a/packages/jupyter-ai/src/components/chat-input.tsx b/packages/jupyter-ai/src/components/chat-input.tsx
index ddc2d4209..b341653e7 100644
--- a/packages/jupyter-ai/src/components/chat-input.tsx
+++ b/packages/jupyter-ai/src/components/chat-input.tsx
@@ -38,12 +38,6 @@ type ChatInputProps = {
   personaName: string;
 };
 
-type SlashCommandOption = {
-  id: string;
-  label: string;
-  description: string;
-};
-
 /**
  * List of icons per slash command, shown in the autocomplete popup.
  *
@@ -65,9 +59,9 @@ const DEFAULT_SLASH_COMMAND_ICONS: Record<string, JSX.Element> = {
 /**
  * Renders an option shown in the slash command autocomplete.
  */
-function renderSlashCommandOption(
+function renderAutocompleteOption(
   optionProps: React.HTMLAttributes<HTMLLIElement>,
-  option: SlashCommandOption
+  option: AiService.AutocompleteOption
 ): JSX.Element {
   const icon =
     option.id in DEFAULT_SLASH_COMMAND_ICONS
@@ -99,8 +93,14 @@ function renderSlashCommandOption(
 
 export function ChatInput(props: ChatInputProps): JSX.Element {
   const [input, setInput] = useState('');
-  const [slashCommandOptions, setSlashCommandOptions] = useState<
-    SlashCommandOption[]
+  const [autocompleteOptions, setAutocompleteOptions] = useState<
+    AiService.AutocompleteOption[]
+  >([]);
+  const [autocompleteCommandOptions, setAutocompleteCommandOptions] = useState<
+    AiService.AutocompleteOption[]
+  >([]);
+  const [autocompleteArgOptions, setAutocompleteArgOptions] = useState<
+    AiService.AutocompleteOption[]
   >([]);
   const [currSlashCommand, setCurrSlashCommand] = useState<string | null>(null);
   const activeCell = useActiveCellContext();
@@ -110,20 +110,45 @@ export function ChatInput(props: ChatInputProps): JSX.Element {
    * initial mount to populate the slash command autocomplete.
    */
   useEffect(() => {
-    async function getSlashCommands() {
-      const slashCommands = (await AiService.listSlashCommands())
-        .slash_commands;
-      setSlashCommandOptions(
-        slashCommands.map<SlashCommandOption>(slashCommand => ({
-          id: slashCommand.slash_id,
-          label: '/' + slashCommand.slash_id + ' ',
-          description: slashCommand.description
-        }))
-      );
+    async function getAutocompleteCommandOptions() {
+      const response = await AiService.listAutocompleteOptions();
+      setAutocompleteCommandOptions(response.options);
     }
-    getSlashCommands();
+    getAutocompleteCommandOptions();
   }, []);
 
+  useEffect(() => {
+    async function getAutocompleteArgOptions() {
+      let options: AiService.AutocompleteOption[] = [];
+      const lastWord = input.split(/\s+/).pop() || '';
+      if (lastWord.startsWith('@') && lastWord.includes(':')) {
+        const [id, argPrefix] = lastWord.split(':', 2);
+        // get option that matches the command
+        const option = autocompleteCommandOptions.find(
+          option => option.id === id.slice(1) && option.type === '@'
+        );
+        if (option) {
+          const response = await AiService.listAutocompleteArgOptions({
+            id: option.id,
+            arg_prefix: argPrefix
+          });
+          options = response.options;
+        }
+      }
+      setAutocompleteArgOptions(options);
+    }
+    getAutocompleteArgOptions();
+  }, [autocompleteCommandOptions, input]);
+
+  // Combine the fixed options with the argument options
+  useEffect(() => {
+    if (autocompleteArgOptions.length > 0) {
+      setAutocompleteOptions(autocompleteArgOptions);
+    } else {
+      setAutocompleteOptions(autocompleteCommandOptions);
+    }
+  }, [autocompleteCommandOptions, autocompleteArgOptions]);
+
   // whether any option is highlighted in the slash command autocomplete
   const [highlighted, setHighlighted] = useState<boolean>(false);
 
@@ -153,7 +178,7 @@ export function ChatInput(props: ChatInputProps): JSX.Element {
    * chat input. Close the autocomplete when the user clears the chat input.
    */
   useEffect(() => {
-    if (input === '/') {
+    if (input === '/' || input.endsWith('@')) {
       setOpen(true);
       return;
     }
@@ -255,12 +280,39 @@ export function ChatInput(props: ChatInputProps): JSX.Element {
     currSlashCommand
   };
 
+  function filterAutocompleteOptions(
+    options: AiService.AutocompleteOption[],
+    inputValue: string
+  ): AiService.AutocompleteOption[] {
+    const lastWord = inputValue.split(/\s+/).pop() || '';
+    if (
+      (lastWord.startsWith('/') && lastWord === inputValue) ||
+      lastWord.startsWith('@')
+    ) {
+      return options.filter(option => option.label.startsWith(lastWord));
+    }
+    return [];
+  }
+
   return (
     <Box sx={props.sx}>
       <Autocomplete
         autoHighlight
         freeSolo
         inputValue={input}
+        filterOptions={(options, { inputValue }) => {
+          return filterAutocompleteOptions(options, inputValue);
+        }}
+        onChange={(_, option) => {
+          const value = typeof option === 'string' ? option : option.label;
+          let matchLength = 0;
+          for (let i = 1; i <= value.length; i++) {
+            if (input.endsWith(value.slice(0, i))) {
+              matchLength = i;
+            }
+          }
+          setInput(input + value.slice(matchLength));
+        }}
         onInputChange={(_, newValue: string) => {
           setInput(newValue);
         }}
@@ -273,12 +325,16 @@ export function ChatInput(props: ChatInputProps): JSX.Element {
             setHighlighted(!!highlightedOption);
           }
         }
-        onClose={() => setOpen(false)}
+        onClose={(_, reason) => {
+          if (reason !== 'selectOption' || input.endsWith(' ')) {
+            setOpen(false);
+          }
+        }}
         // set this to an empty string to prevent the last selected slash
         // command from being shown in blue
         value=""
         open={open}
-        options={slashCommandOptions}
+        options={autocompleteOptions}
         // hide default extra right padding in the text field
         disableClearable
         // ensure the autocomplete popup always renders on top
@@ -292,7 +348,7 @@ export function ChatInput(props: ChatInputProps): JSX.Element {
             }
           }
         }}
-        renderOption={renderSlashCommandOption}
+        renderOption={renderAutocompleteOption}
         ListboxProps={{
           sx: {
             '& .MuiAutocomplete-option': {
diff --git a/packages/jupyter-ai/src/handler.ts b/packages/jupyter-ai/src/handler.ts
index b2f4efc16..64771e158 100644
--- a/packages/jupyter-ai/src/handler.ts
+++ b/packages/jupyter-ai/src/handler.ts
@@ -315,4 +315,38 @@ export namespace AiService {
   export async function listSlashCommands(): Promise<ListSlashCommandsResponse> {
     return requestAPI<ListSlashCommandsResponse>('chats/slash_commands');
   }
+
+  export type AutocompleteOption = {
+    type: '/' | '@';
+    id: string;
+    description: string;
+    label: string;
+  };
+
+  export type ListAutocompleteOptionsResponse = {
+    options: AutocompleteOption[];
+  };
+
+  export type AutocompleteArgOptionsRequest = {
+    id: string;
+    arg_prefix: string;
+  };
+
+  export async function listAutocompleteOptions(): Promise<ListAutocompleteOptionsResponse> {
+    return requestAPI<ListAutocompleteOptionsResponse>(
+      'chats/autocomplete_options'
+    );
+  }
+
+  export async function listAutocompleteArgOptions(
+    request: AutocompleteArgOptionsRequest
+  ): Promise<ListAutocompleteOptionsResponse> {
+    return requestAPI<ListAutocompleteOptionsResponse>(
+      'chats/autocomplete_options',
+      {
+        method: 'POST',
+        body: JSON.stringify(request)
+      }
+    );
+  }
 }