From fb4a8336e1f146d8a8f1a1b6cdea8e030a46c9ca Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 31 Oct 2024 13:49:21 -0700 Subject: [PATCH] Backport PR #1068: Allow `$` to literally denote quantities of USD in chat (#1079) Co-authored-by: david qiu --- .../jupyter_ai_magics/providers.py | 8 ++- .../src/components/rendermime-markdown.tsx | 69 ++++++++++++++++++- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py index 8a28c5251..15402dd96 100644 --- a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py +++ b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py @@ -55,8 +55,12 @@ You are not a language model, but rather an application built on a foundation model from {provider_name} called {local_model_id}. You are talkative and you provide lots of specific details from the foundation model's context. You may use Markdown to format your response. -Code blocks must be formatted in Markdown. -Math should be rendered with inline TeX markup, surrounded by $. +If your response includes code, they must be enclosed in Markdown fenced code blocks (with triple backticks before and after). +If your response includes mathematical notation, they must be expressed in LaTeX markup and enclosed in LaTeX delimiters. +- Single dollar signs ($) should never be used as delimiters for inline math. +- Valid inline math: `\\( \\infty \\)` +- Valid display math: `\\[ \\infty \\]` +- Invalid inline math: `$\\infty$` If you do not know the answer to a question, answer truthfully by responding that you do not know. The following is a friendly conversation between you and a human. """.strip() diff --git a/packages/jupyter-ai/src/components/rendermime-markdown.tsx b/packages/jupyter-ai/src/components/rendermime-markdown.tsx index 976cccb72..4ecdd8745 100644 --- a/packages/jupyter-ai/src/components/rendermime-markdown.tsx +++ b/packages/jupyter-ai/src/components/rendermime-markdown.tsx @@ -24,7 +24,12 @@ type RendermimeMarkdownProps = { }; /** - * Takes \( and returns \\(. Escapes LaTeX delimeters by adding extra backslashes where needed for proper rendering by @jupyterlab/rendermime. + * Escapes backslashes in LaTeX delimiters such that they appear in the DOM + * after the initial MarkDown render. For example, this function takes '\(` and + * returns `\\(`. + * + * Required for proper rendering of MarkDown + LaTeX markup in the chat by + * `ILatexTypesetter`. */ function escapeLatexDelimiters(text: string) { return text @@ -34,6 +39,61 @@ function escapeLatexDelimiters(text: string) { .replace(/\\\]/g, '\\\\]'); } +/** + * Type predicate function that determines whether a given DOM Node is a Text + * node. + */ +function isTextNode(node: Node | null): node is Text { + return node?.nodeType === Node.TEXT_NODE; +} + +/** + * Escapes all `$` symbols present in an HTML element except those within the + * following elements: `pre`, `code`, `samp`, `kbd`. + * + * This prevents `$` symbols from being used as inline math delimiters, allowing + * `$` symbols to be used literally to denote quantities of USD. This does not + * escape literal `$` within elements that display their contents literally, + * like code elements. This overrides JupyterLab's default rendering of MarkDown + * w/ LaTeX. + * + * The Jupyter AI system prompt should explicitly request that the LLM not use + * `$` as an inline math delimiter. This is the default behavior. + */ +function escapeDollarSymbols(el: HTMLElement) { + // Get all text nodes that are not within pre, code, samp, or kbd elements + const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, { + acceptNode: node => { + const isInSkippedElements = node.parentElement?.closest( + 'pre, code, samp, kbd' + ); + return isInSkippedElements + ? NodeFilter.FILTER_SKIP + : NodeFilter.FILTER_ACCEPT; + } + }); + + // Collect all valid text nodes in an array. + const textNodes: Text[] = []; + let currentNode: Node | null; + while ((currentNode = walker.nextNode())) { + if (isTextNode(currentNode)) { + textNodes.push(currentNode); + } + } + + // Replace each `$` symbol with `\$` for each text node, unless there is + // another `$` symbol adjacent or it is already escaped. Examples: + // - `$10 - $5` => `\$10 - \$5` (escaped) + // - `$$ \infty $$` => `$$ \infty $$` (unchanged) + // - `\$10` => `\$10` (unchanged, already escaped) + textNodes.forEach(node => { + if (node.textContent) { + node.textContent = node.textContent.replace(/(? { @@ -57,19 +117,24 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element { */ useEffect(() => { const renderContent = async () => { + // initialize mime model const mdStr = escapeLatexDelimiters(props.markdownStr); const model = props.rmRegistry.createModel({ data: { [MD_MIME_TYPE]: mdStr } }); + // step 1: render markdown await renderer.renderModel(model); - props.rmRegistry.latexTypesetter?.typeset(renderer.node); if (!renderer.node) { throw new Error( 'Rendermime was unable to render Markdown content within a chat message. Please report this upstream to Jupyter AI on GitHub.' ); } + // step 2: render LaTeX via MathJax, while escaping single dollar symbols. + escapeDollarSymbols(renderer.node); + props.rmRegistry.latexTypesetter?.typeset(renderer.node); + // insert the rendering into renderingContainer if not yet inserted if (renderingContainer.current !== null && !renderingInserted.current) { renderingContainer.current.appendChild(renderer.node);