From 3734d46ca95577f32859b1cebfab33c69c379f75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Krassowski?=
 <5832902+krassowski@users.noreply.github.com>
Date: Fri, 29 Nov 2024 11:36:14 +0000
Subject: [PATCH] Fix regression in standard error rendering performance
 (#16975)

* Add performance and caching tests for stdout/stderr

* Add linkification cache for stdout/stderr rendering

* Fix interaction with search highlights

* Use deep cloning so that link label is preserved

* Apply suggestions from code review

Co-authored-by: M Bussonnier <bussonniermatthias@gmail.com>

* Fix tests

---------

Co-authored-by: M Bussonnier <bussonniermatthias@gmail.com>
---
 packages/rendermime/src/renderers.ts       | 181 +++++++++++++++-----
 packages/rendermime/test/factories.spec.ts | 190 ++++++++++++++++++++-
 2 files changed, 331 insertions(+), 40 deletions(-)
diff --git a/packages/rendermime/src/renderers.ts b/packages/rendermime/src/renderers.ts
index 70bd3489f6d0..65c2a31331b5 100644
--- a/packages/rendermime/src/renderers.ts
+++ b/packages/rendermime/src/renderers.ts
@@ -787,6 +787,24 @@ function* alignedNodes<T extends Node, U extends Node>(
  * @returns A promise which resolves when rendering is complete.
  */
 export function renderText(options: renderText.IRenderOptions): Promise<void> {
+  renderTextual(options, {
+    checkWeb: true,
+    checkPaths: false
+  });
+
+  // Return the rendered promise.
+  return Promise.resolve(undefined);
+}
+
+/**
+ * Render the textual representation into a host node.
+ *
+ * Implements the shared logic for `renderText` and `renderError`.
+ */
+function renderTextual(
+  options: renderText.IRenderOptions,
+  autoLinkOptions: IAutoLinkOptions
+): void {
   // Unpack the options.
   const { host, sanitizer, source } = options;
 
@@ -801,16 +819,60 @@ export function renderText(options: renderText.IRenderOptions): Promise<void> {
 
   const preTextContent = pre.textContent;
 
+  const cacheStoreOptions = [];
+  if (autoLinkOptions.checkWeb) {
+    cacheStoreOptions.push('web');
+  }
+  if (autoLinkOptions.checkPaths) {
+    cacheStoreOptions.push('paths');
+  }
+  const cacheStoreKey = cacheStoreOptions.join('-');
+  let cacheStore = Private.autoLinkCache.get(cacheStoreKey);
+  if (!cacheStore) {
+    cacheStore = new WeakMap();
+    Private.autoLinkCache.set(cacheStoreKey, cacheStore);
+  }
+
   let ret: HTMLPreElement;
   if (preTextContent) {
     // Note: only text nodes and span elements should be present after sanitization in the `<pre>` element.
-    const linkedNodes =
-      sanitizer.getAutolink?.() ?? true
-        ? autolink(preTextContent, {
-            checkWeb: true,
-            checkPaths: false
-          })
-        : [document.createTextNode(content)];
+    let linkedNodes: (HTMLAnchorElement | Text)[];
+    if (sanitizer.getAutolink?.() ?? true) {
+      const cache = getApplicableLinkCache(
+        cacheStore.get(host),
+        preTextContent
+      );
+      if (cache) {
+        const { cachedNodes: fromCache, addedText } = cache;
+        const newAdditions = autolink(addedText, autoLinkOptions);
+        const lastInCache = fromCache[fromCache.length - 1];
+        const firstNewNode = newAdditions[0];
+
+        if (lastInCache instanceof Text && firstNewNode instanceof Text) {
+          const joiningNode = lastInCache;
+          joiningNode.data += firstNewNode.data;
+          linkedNodes = [
+            ...fromCache.slice(0, -1),
+            joiningNode,
+            ...newAdditions.slice(1)
+          ];
+        } else {
+          linkedNodes = [...fromCache, ...newAdditions];
+        }
+      } else {
+        linkedNodes = autolink(preTextContent, autoLinkOptions);
+      }
+      cacheStore.set(host, {
+        preTextContent,
+        // Clone the nodes before storing them in the cache in case if another component
+        // attempts to modify (e.g. dispose of) them - which is the case for search highlights!
+        linkedNodes: linkedNodes.map(
+          node => node.cloneNode(true) as HTMLAnchorElement | Text
+        )
+      });
+    } else {
+      linkedNodes = [document.createTextNode(content)];
+    }
 
     const preNodes = Array.from(pre.childNodes) as (Text | HTMLSpanElement)[];
     ret = mergeNodes(preNodes, linkedNodes);
@@ -819,9 +881,6 @@ export function renderText(options: renderText.IRenderOptions): Promise<void> {
   }
 
   host.appendChild(ret);
-
-  // Return the rendered promise.
-  return Promise.resolve(undefined);
 }
 
 /**
@@ -854,6 +913,68 @@ export namespace renderText {
   }
 }
 
+interface IAutoLinkCacheEntry {
+  preTextContent: string;
+  linkedNodes: (HTMLAnchorElement | Text)[];
+}
+
+/**
+ * Return the information from the cache that can be used given the cache entry and current text.
+ * If the cache is invalid given the current text (or cannot be used) `null` is returned.
+ */
+function getApplicableLinkCache(
+  cachedResult: IAutoLinkCacheEntry | undefined,
+  preTextContent: string
+): {
+  cachedNodes: IAutoLinkCacheEntry['linkedNodes'];
+  addedText: string;
+} | null {
+  if (!cachedResult) {
+    return null;
+  }
+  if (preTextContent.length < cachedResult.preTextContent.length) {
+    // If the new content is shorter than the cached content
+    // we cannot use the cache as we only support appending.
+    return null;
+  }
+  let addedText = preTextContent.substring(cachedResult.preTextContent.length);
+  let cachedNodes = cachedResult.linkedNodes;
+  const lastCachedNode =
+    cachedResult.linkedNodes[cachedResult.linkedNodes.length - 1];
+
+  // Only use cached nodes if:
+  // - the last cached node is a text node
+  // - the new content starts with a new line
+  // - the old content ends with a new line
+  if (
+    cachedResult.preTextContent.endsWith('\n') ||
+    addedText.startsWith('\n')
+  ) {
+    // Second or third condition is met, we can use the cached nodes
+    // (this is a no-op, we just continue execution).
+  } else if (lastCachedNode instanceof Text) {
+    // The first condition is met, we can use the cached nodes,
+    // but first we remove the Text node to re-analyse its text.
+    // This is required when we cached `aaa www.one.com bbb www.`
+    // and the incoming addition is `two.com`. We can still
+    // use text node `aaa ` and anchor node `www.one.com`, but
+    // we need to pass `bbb www.` + `two.com` through linkify again.
+    cachedNodes = cachedNodes.slice(0, -1);
+    addedText = lastCachedNode.textContent + addedText;
+  } else {
+    return null;
+  }
+
+  // Finally check if text has not changed.
+  if (!preTextContent.startsWith(cachedResult.preTextContent)) {
+    return null;
+  }
+  return {
+    cachedNodes,
+    addedText
+  };
+}
+
 /**
  * Render error into a host node.
  *
@@ -865,37 +986,13 @@ export function renderError(
   options: renderError.IRenderOptions
 ): Promise<void> {
   // Unpack the options.
-  const { host, linkHandler, sanitizer, resolver, source } = options;
+  const { host, linkHandler, resolver } = options;
 
-  // Create the HTML content.
-  const content = sanitizer.sanitize(Private.ansiSpan(source), {
-    allowedTags: ['span']
+  renderTextual(options, {
+    checkWeb: true,
+    checkPaths: true
   });
 
-  // Set the sanitized content for the host node.
-  const pre = document.createElement('pre');
-  pre.innerHTML = content;
-
-  const preTextContent = pre.textContent;
-
-  let ret: HTMLPreElement;
-  if (preTextContent) {
-    // Note: only text nodes and span elements should be present after sanitization in the `<pre>` element.
-    const linkedNodes =
-      sanitizer.getAutolink?.() ?? true
-        ? autolink(preTextContent, {
-            checkWeb: true,
-            checkPaths: true
-          })
-        : [document.createTextNode(content)];
-
-    const preNodes = Array.from(pre.childNodes) as (Text | HTMLSpanElement)[];
-    ret = mergeNodes(preNodes, linkedNodes);
-  } else {
-    ret = document.createElement('pre');
-  }
-  host.appendChild(ret);
-
   // Patch the paths if a resolver is available.
   let promise: Promise<void>;
   if (resolver) {
@@ -1012,6 +1109,14 @@ export namespace renderError {
  * The namespace for module implementation details.
  */
 namespace Private {
+  /**
+   * Cache for auto-linking results to provide better performance when streaming outputs.
+   */
+  export const autoLinkCache = new Map<
+    string,
+    WeakMap<HTMLElement, IAutoLinkCacheEntry>
+  >();
+
   /**
    * Eval the script tags contained in a host populated by `innerHTML`.
    *
diff --git a/packages/rendermime/test/factories.spec.ts b/packages/rendermime/test/factories.spec.ts
index ffe1e59dd108..d9d3db2463e2 100644
--- a/packages/rendermime/test/factories.spec.ts
+++ b/packages/rendermime/test/factories.spec.ts
@@ -538,6 +538,25 @@ describe('rendermime/factories', () => {
     };
 
     describe('#createRenderer()', () => {
+      // Mock creation of DOM nodes to distinguish cached
+      /// (cloned) nodes from nodes created from scratch.
+      beforeEach(() => {
+        const originalCloneNode = Node.prototype.cloneNode;
+
+        Node.prototype.cloneNode = function (...args: any) {
+          const clonedNode = originalCloneNode.apply(this, args);
+
+          // Annotate as a node created by cloning.
+          clonedNode.wasCloned = true;
+
+          return clonedNode;
+        };
+      });
+
+      afterEach(() => {
+        jest.restoreAllMocks();
+      });
+
       it('should output the correct HTML', async () => {
         const f = errorRendererFactory;
         const mimeType = 'application/vnd.jupyter.stderr';
@@ -570,6 +589,173 @@ describe('rendermime/factories', () => {
         );
       });
 
+      it.each([
+        // Note: timeouts are set to 3.5 times more the local performance to allow for slower runs on CI
+        //
+        // Local benchmarks:
+        // - without linkify cache: 12.5s
+        // - with cache: 1.1s
+        [
+          'when new content arrives line by line',
+          '\n' + 'X'.repeat(5000),
+          1100 * 3.5
+        ],
+        // Local benchmarks:
+        // - without cache: 3.8s
+        // - with cache: 0.8s
+        [
+          'when new content is added to the same line',
+          'test.com ' + 'X'.repeat(2500) + ' www.',
+          800 * 3.5
+        ]
+      ])('should be fast %s', async (_, newContent, timeout) => {
+        let source = '';
+        const mimeType = 'application/vnd.jupyter.stderr';
+
+        const model = createModel(mimeType, source);
+        const w = errorRendererFactory.createRenderer({ mimeType, ...options });
+
+        const start = performance.now();
+        for (let i = 0; i < 25; i++) {
+          source += newContent;
+          model.setData({
+            data: {
+              [mimeType]: source
+            }
+          });
+          await w.renderModel(model);
+        }
+        const end = performance.now();
+
+        expect(end - start).toBeLessThan(timeout);
+      });
+
+      it.each([
+        ['arrives in a new line', 'www.example.com', '\n a new line of text'],
+        ['arrives after a new line', 'www.example.com\n', 'a new line of text'],
+        ['arrives after a text node', 'www.example.com next line', ' of text'],
+        ['arrives after a text node', 'www.example.com\nnext line', ' of text']
+      ])(
+        'should use cached links if new content %s',
+        async (_, oldSource, addition) => {
+          const mimeType = 'application/vnd.jupyter.stderr';
+          let source = oldSource;
+          const model = createModel(mimeType, source);
+          const w = errorRendererFactory.createRenderer({
+            mimeType,
+            ...defaultOptions
+          });
+          // Perform an initial render to populate the cache.
+          await w.renderModel(model);
+          const before = w.node.innerHTML;
+          const cachedLink = w.node.querySelector('a');
+          expect(cachedLink).toBe(w.node.childNodes[0].childNodes[0]);
+
+          // Update the source.
+          source += addition;
+          model.setData({
+            data: {
+              [mimeType]: source
+            }
+          });
+
+          // Perform a second render which should use the cache.
+          await w.renderModel(model);
+          const after = w.node.innerHTML;
+          const linkAfter = w.node.querySelector('a');
+
+          // The contents of the node should be updated with the new line.
+          expect(before).not.toEqual(after);
+          expect(after).toContain('line of text');
+
+          expect(cachedLink).not.toBe(null);
+          expect(cachedLink).not.toHaveProperty('wasCloned');
+
+          // If cached links were reused those would be cloned
+          expect(linkAfter).not.toBe(null);
+          expect(linkAfter).toEqual(cachedLink);
+          expect(linkAfter).toHaveProperty('wasCloned', true);
+        }
+      );
+
+      it('should not use cached links if the new content appends to the link', async () => {
+        const mimeType = 'application/vnd.jupyter.stderr';
+        let source = 'www.example.co';
+        const model = createModel(mimeType, source);
+        const w = errorRendererFactory.createRenderer({
+          mimeType,
+          ...defaultOptions
+        });
+        // Perform an initial render to populate the cache.
+        await w.renderModel(model);
+        const before = w.node.innerHTML;
+        const cachedLink = w.node.querySelector('a');
+
+        // Update the source.
+        source += 'm';
+        model.setData({
+          data: {
+            [mimeType]: source
+          }
+        });
+
+        // Perform a second render.
+        await w.renderModel(model);
+        const after = w.node.innerHTML;
+        const linkAfter = w.node.querySelector('a');
+
+        // The contents of the node should be updated with the new line.
+        expect(before).not.toEqual(after);
+
+        expect(cachedLink).not.toBe(null);
+        expect(cachedLink!.textContent).toEqual('www.example.co');
+        expect(cachedLink).not.toHaveProperty('wasCloned');
+
+        // If cached links were reused those would be cloned
+        expect(linkAfter).not.toBe(null);
+        expect(linkAfter!.textContent).toEqual('www.example.com');
+        expect(linkAfter).not.toHaveProperty('wasCloned');
+      });
+
+      it('should use partial cache if a link is created by addition of a new fragment', async () => {
+        const mimeType = 'application/vnd.jupyter.stderr';
+        let source = 'aaa www.one.com bbb www.';
+        const model = createModel(mimeType, source);
+        const w = errorRendererFactory.createRenderer({
+          mimeType,
+          ...defaultOptions
+        });
+        // Perform an initial render to populate the cache.
+        await w.renderModel(model);
+        const cachedTextNode = w.node.childNodes[0].childNodes[0];
+        const linksBefore = w.node.querySelectorAll('a');
+        expect(linksBefore).toHaveLength(1);
+
+        // Update the source.
+        source += 'two.com';
+        model.setData({
+          data: {
+            [mimeType]: source
+          }
+        });
+
+        // Perform a second render.
+        await w.renderModel(model);
+        const textNodeAfter = w.node.childNodes[0].childNodes[0];
+        const linksAfter = w.node.querySelectorAll('a');
+
+        // It should not use the second text node (`bbb www.`) from cache and instead
+        // it should fragment properly linkify the second link
+        expect(linksAfter).toHaveLength(2);
+
+        expect(cachedTextNode).toBeInstanceOf(Text);
+        expect(cachedTextNode).not.toHaveProperty('wasCloned');
+
+        // If cached nodes were reused those would be cloned
+        expect(textNodeAfter).toEqual(cachedTextNode);
+        expect(textNodeAfter).toHaveProperty('wasCloned', true);
+      });
+
       it('should autolink a single known file path', async () => {
         const f = errorRendererFactory;
         const urls = [
@@ -642,10 +828,10 @@ describe('rendermime/factories', () => {
         const source = 'www.example.com';
         const expected =
           '<pre><a href="https://www.example.com" rel="noopener" target="_blank">www.example.com</a></pre>';
-        const f = textRendererFactory;
+        const f = errorRendererFactory;
         const mimeType = 'application/vnd.jupyter.stderr';
         const model = createModel(mimeType, source);
-        const w = f.createRenderer({ mimeType, ...options });
+        const w = f.createRenderer({ mimeType, ...defaultOptions });
         await w.renderModel(model);
         expect(w.node.innerHTML).toBe(expected);
       });