From a98ceefc0bcbceb71f920aa0c65c82cb00dbaaf9 Mon Sep 17 00:00:00 2001 From: "David L. Qiu" Date: Thu, 31 Oct 2024 10:54:18 -0700 Subject: [PATCH] escape `$` only if alone and not in a code element --- .../src/components/rendermime-markdown.tsx | 70 ++++++++++++++++--- 1 file changed, 62 insertions(+), 8 deletions(-) diff --git a/packages/jupyter-ai/src/components/rendermime-markdown.tsx b/packages/jupyter-ai/src/components/rendermime-markdown.tsx index 0401618e2..d0204d31f 100644 --- a/packages/jupyter-ai/src/components/rendermime-markdown.tsx +++ b/packages/jupyter-ai/src/components/rendermime-markdown.tsx @@ -25,19 +25,68 @@ type RendermimeMarkdownProps = { /** * Escapes LaTeX delimiters and single dollar signs by adding extra backslashes. - * Required for proper rendering of LaTeX markup by `@jupyterlab/rendermime`, - * and allows for `$` to be used literally to denote quantities of USD. - * - * The Jupyter AI system prompt should explicitly request that the LLM not use - * `$` as an inline math delimiter. This is the default behavior. + * Required for proper rendering of LaTeX markup by `@jupyterlab/rendermime`. */ function escapeLatexDelimiters(text: string) { return text .replace(/\\\(/g, '\\\\(') .replace(/\\\)/g, '\\\\)') .replace(/\\\[/g, '\\\\[') - .replace(/\\\]/g, '\\\\]') - .replace(/\$/g, '\\\\$'); + .replace(/\\\]/g, '\\\\]'); +} + +/** + * Type predicate function that determines whether a given DOM Node is a Text + * node. + */ +function isTextNode(node: Node | null): node is Text { + return node?.nodeType === Node.TEXT_NODE; +} + +/** + * Escapes all `$` symbols present in an HTML element except those within the + * following elements: `pre`, `code`, `samp`, `kbd`. + * + * This prevents `$` symbols from being used as inline math delimiters, allowing + * `$` symbols to be used literally to denote quantities of USD. This does not + * escape literal `$` within elements that display their contents literally, + * like code elements. This overrides JupyterLab's default rendering of MarkDown + * w/ LaTeX. + * + * The Jupyter AI system prompt should explicitly request that the LLM not use + * `$` as an inline math delimiter. This is the default behavior. + */ +function escapeDollarSymbols(el: HTMLElement) { + // Get all text nodes that are not within pre, code, samp, or kbd elements + const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, { + acceptNode: node => { + const isInSkippedElements = node.parentElement?.closest( + 'pre, code, samp, kbd' + ); + return isInSkippedElements + ? NodeFilter.FILTER_SKIP + : NodeFilter.FILTER_ACCEPT; + } + }); + + // Collect all valid text nodes in an array. + const textNodes: Text[] = []; + let currentNode: Node | null; + while ((currentNode = walker.nextNode())) { + if (isTextNode(currentNode)) { + textNodes.push(currentNode); + } + } + + // Replace each `$` symbol with `\$` for each text node, unless there is + // another `$` symbol adjacent. Examples: + // - `$10 - $5` => `\$10 - \$5` (escaped) + // - `$$ \infty $$` => `$$ \infty $$` (unchanged) + textNodes.forEach(node => { + if (node.textContent) { + node.textContent = node.textContent.replace(/(? { const renderContent = async () => { + // initialize mime model const mdStr = escapeLatexDelimiters(props.markdownStr); const model = props.rmRegistry.createModel({ data: { [MD_MIME_TYPE]: mdStr } }); + // step 1: render markdown await renderer.renderModel(model); - props.rmRegistry.latexTypesetter?.typeset(renderer.node); if (!renderer.node) { throw new Error( 'Rendermime was unable to render Markdown content within a chat message. Please report this upstream to Jupyter AI on GitHub.' ); } + // step 2: render LaTeX via MathJax, while escaping single dollar symbols. + escapeDollarSymbols(renderer.node); + props.rmRegistry.latexTypesetter?.typeset(renderer.node); + // insert the rendering into renderingContainer if not yet inserted if (renderingContainer.current !== null && !renderingInserted.current) { renderingContainer.current.appendChild(renderer.node);