Skip to content

Commit

Permalink
escape $ only if alone and not in a code element
Browse files Browse the repository at this point in the history
  • Loading branch information
dlqqq committed Oct 31, 2024
1 parent cab6156 commit a98ceef
Showing 1 changed file with 62 additions and 8 deletions.
70 changes: 62 additions & 8 deletions packages/jupyter-ai/src/components/rendermime-markdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,68 @@ type RendermimeMarkdownProps = {

/**
* Escapes LaTeX delimiters and single dollar signs by adding extra backslashes.
* Required for proper rendering of LaTeX markup by `@jupyterlab/rendermime`,
* and allows for `$` to be used literally to denote quantities of USD.
*
* The Jupyter AI system prompt should explicitly request that the LLM not use
* `$` as an inline math delimiter. This is the default behavior.
* Required for proper rendering of LaTeX markup by `@jupyterlab/rendermime`.
*/
function escapeLatexDelimiters(text: string) {
return text
.replace(/\\\(/g, '\\\\(')
.replace(/\\\)/g, '\\\\)')
.replace(/\\\[/g, '\\\\[')
.replace(/\\\]/g, '\\\\]')
.replace(/\$/g, '\\\\$');
.replace(/\\\]/g, '\\\\]');
}

/**
* Type predicate function that determines whether a given DOM Node is a Text
* node.
*/
function isTextNode(node: Node | null): node is Text {
return node?.nodeType === Node.TEXT_NODE;
}

/**
* Escapes all `$` symbols present in an HTML element except those within the
* following elements: `pre`, `code`, `samp`, `kbd`.
*
* This prevents `$` symbols from being used as inline math delimiters, allowing
* `$` symbols to be used literally to denote quantities of USD. This does not
* escape literal `$` within elements that display their contents literally,
* like code elements. This overrides JupyterLab's default rendering of MarkDown
* w/ LaTeX.
*
* The Jupyter AI system prompt should explicitly request that the LLM not use
* `$` as an inline math delimiter. This is the default behavior.
*/
function escapeDollarSymbols(el: HTMLElement) {
// Get all text nodes that are not within pre, code, samp, or kbd elements
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, {
acceptNode: node => {
const isInSkippedElements = node.parentElement?.closest(
'pre, code, samp, kbd'
);
return isInSkippedElements
? NodeFilter.FILTER_SKIP
: NodeFilter.FILTER_ACCEPT;
}
});

// Collect all valid text nodes in an array.
const textNodes: Text[] = [];
let currentNode: Node | null;
while ((currentNode = walker.nextNode())) {
if (isTextNode(currentNode)) {
textNodes.push(currentNode);
}
}

// Replace each `$` symbol with `\$` for each text node, unless there is
// another `$` symbol adjacent. Examples:
// - `$10 - $5` => `\$10 - \$5` (escaped)
// - `$$ \infty $$` => `$$ \infty $$` (unchanged)
textNodes.forEach(node => {
if (node.textContent) {
node.textContent = node.textContent.replace(/(?<!\$)\$(?!\$)/g, '\\$');
}
});
}

function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
Expand All @@ -63,19 +112,24 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
*/
useEffect(() => {
const renderContent = async () => {
// initialize mime model
const mdStr = escapeLatexDelimiters(props.markdownStr);
const model = props.rmRegistry.createModel({
data: { [MD_MIME_TYPE]: mdStr }
});

// step 1: render markdown
await renderer.renderModel(model);
props.rmRegistry.latexTypesetter?.typeset(renderer.node);
if (!renderer.node) {
throw new Error(
'Rendermime was unable to render Markdown content within a chat message. Please report this upstream to Jupyter AI on GitHub.'
);
}

// step 2: render LaTeX via MathJax, while escaping single dollar symbols.
escapeDollarSymbols(renderer.node);
props.rmRegistry.latexTypesetter?.typeset(renderer.node);

// insert the rendering into renderingContainer if not yet inserted
if (renderingContainer.current !== null && !renderingInserted.current) {
renderingContainer.current.appendChild(renderer.node);
Expand Down

0 comments on commit a98ceef

Please sign in to comment.