shikijs · fuma-nama · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/packages/transformers/src/index.ts b/packages/transformers/src/index.ts
@@ -10,4 +10,3 @@ export * from './transformers/remove-line-breaks'
 export * from './transformers/remove-notation-escape'
 export * from './transformers/render-whitespace'
 export * from './transformers/style-to-class'
-export * from './utils'
diff --git a/packages/transformers/src/shared/highlight-word.ts b/packages/transformers/src/shared/highlight-word.ts
@@ -24,6 +24,7 @@ function getTextContent(element: ElementContent): string {
  * @param ignoredElement
  * @param index highlight beginning index
  * @param len highlight length
+ * @param className class name to add to highlighted nodes
  */
 function highlightRange(
   this: ShikiTransformerContext,
@@ -64,14 +65,14 @@ function highlightRange(
   }
 }
 
-function hasOverlap(range1: [number, number], range2: [ number, number]): boolean {
+function hasOverlap(range1: [number, number], range2: [number, number]): boolean {
   return (range1[0] <= range2[1]) && (range1[1]) >= range2[0]
 }
 
 function separateToken(span: Element, textNode: Text, index: number, len: number): [
-  before: Element | undefined,
-  med: Element,
-  after: Element | undefined,
+    before: Element | undefined,
+    med: Element,
+    after: Element | undefined,
 ] {
   const text = textNode.value
 

diff --git a/packages/transformers/src/shared/notation-transformer.ts b/packages/transformers/src/shared/notation-transformer.ts
@@ -0,0 +1,85 @@
+import type { Element, Text } from 'hast'
+import type { ShikiTransformer, ShikiTransformerContext } from 'shiki'
+import { legacyClearEndCommentPrefix, parseComments, type ParsedComments } from './parse-comments'
+
+export function createCommentNotationTransformer(
+  name: string,
+  regex: RegExp,
+  onMatch: (
+    this: ShikiTransformerContext,
+    match: string[],
+    line: Element,
+    commentNode: Element,
+    lines: Element[],
+    index: number
+  ) => boolean,
+  legacy = false,
+): ShikiTransformer {
+  return {
+    name,
+    code(code) {
+      const lines = code.children.filter(i => i.type === 'element')
+      const linesToRemove: (Element | Text)[] = []
+
+      code.data ??= {} as any
+      const data = code.data as {
+        _shiki_notation?: ParsedComments
+      }
+
+      data._shiki_notation ??= parseComments(lines, ['jsx', 'tsx'].includes(this.options.lang), legacy)
+      const parsed = data._shiki_notation
+
+      for (const comment of parsed) {
+        if (comment.info[1].length === 0)
+          continue
+
+        const isLineCommentOnly = comment.line.children.length === (comment.isJsxStyle ? 3 : 1)
+        let lineIdx = lines.indexOf(comment.line)
+        if (isLineCommentOnly && !legacy)
+          lineIdx++
+
+        let replaced = false
+        comment.info[1] = comment.info[1].replace(regex, (...match) => {
+          if (onMatch.call(this, match, comment.line, comment.token, lines, lineIdx)) {
+            replaced = true
+            return ''
+          }
+
+          return match[0]
+        })
+
+        if (!replaced)
+          continue
+
+        if (legacy) {
+          comment.info[1] = legacyClearEndCommentPrefix(comment.info[1])
+        }
+
+        const isEmpty = comment.info[1].trim().length === 0
+        // ignore comment node
+        if (isEmpty)
+          comment.info[1] = ''
+
+        if (isEmpty && isLineCommentOnly) {
+          linesToRemove.push(comment.line)
+        }
+        else if (isEmpty && comment.isJsxStyle) {
+          comment.line.children.splice(comment.line.children.indexOf(comment.token) - 1, 3)
+        }
+        else if (isEmpty) {
+          comment.line.children.splice(comment.line.children.indexOf(comment.token), 1)
+        }
+        else {
+          const head = comment.token.children[0]
+
+          if (head.type === 'text') {
+            head.value = comment.info.join('')
+          }
+        }
+      }
+
+      for (const line of linesToRemove)
+        code.children.splice(code.children.indexOf(line), 1)
+    },
+  }
+}
diff --git a/packages/transformers/src/shared/parse-comments.ts b/packages/transformers/src/shared/parse-comments.ts
@@ -0,0 +1,127 @@
+import type { Element, ElementContent } from 'hast'
+
+export type ParsedComments = {
+  line: Element
+  token: Element
+  info: [prefix: string, content: string, suffix?: string]
+  isJsxStyle: boolean
+}[]
+
+/**
+ * some comment formats have to be located at the end of line
+ * hence we can skip matching them for other tokens
+ */
+const matchers: [re: RegExp, endOfLine: boolean][] = [
+  [/^(<!--)(.+)(-->)$/, false],
+  [/^(\/\*)(.+)(\*\/)$/, false],
+  [/^(\/\/|["'#]|;{1,2}|%{1,2}|--)(.*)$/, true],
+  /**
+   * for multi-line comments like this
+   */
+  [/^(\*)(.+)$/, true],
+]
+
+/**
+ * @param lines line tokens
+ * @param jsx enable JSX parsing
+ * @param legacy support legacy behaviours, force to parse all tokens.
+ */
+export function parseComments(lines: Element[], jsx: boolean, legacy = false): ParsedComments {
+  const out: ParsedComments = []
+
+  for (const line of lines) {
+    const elements = line.children
+    let start = elements.length - 1
+    if (legacy)
+      start = 0
+    else if (jsx)
+      // one step further for JSX as comment is inside curly brackets
+      start = elements.length - 2
+
+    for (let i = Math.max(start, 0); i < elements.length; i++) {
+      const token = elements[i]
+      if (token.type !== 'element')
+        continue
+      const head = token.children.at(0)
+      if (head?.type !== 'text')
+        continue
+
+      const isLast = i === elements.length - 1
+      const match = matchToken(head.value, isLast)
+      if (!match)
+        continue
+
+      if (jsx && !isLast && i !== 0) {
+        out.push({
+          info: match,
+          line,
+          token,
+          isJsxStyle: isValue(elements[i - 1], '{') && isValue(elements[i + 1], '}'),
+        })
+      }
+      else {
+        out.push({
+          info: match,
+          line,
+          token,
+          isJsxStyle: false,
+        })
+      }
+    }
+  }
+
+  return out
+}
+
+function isValue(element: ElementContent, value: string): boolean {
+  if (element.type !== 'element')
+    return false
+  const text = element.children[0]
+  if (text.type !== 'text')
+    return false
+
+  return text.value.trim() === value
+}
+
+/**
+ * @param text text value of comment node
+ * @param isLast whether the token is located at the end of line
+ */
+function matchToken(text: string, isLast: boolean): [prefix: string, content: string, suffix?: string] | undefined {
+  // no leading and trailing spaces allowed for matchers
+  // we extract the spaces
+  let trimmed = text.trimStart()
+  const spaceFront = text.length - trimmed.length
+
+  trimmed = trimmed.trimEnd()
+  const spaceEnd = text.length - trimmed.length - spaceFront
+
+  for (const [matcher, endOfLine] of matchers) {
+    if (endOfLine && !isLast)
+      continue
+
+    const result = matcher.exec(trimmed)
+    if (!result)
+      continue
+
+    return [
+      ' '.repeat(spaceFront) + result[1],
+      result[2],
+      result[3] ? result[3] + ' '.repeat(spaceEnd) : undefined,
+    ]
+  }
+}
+
+/**
+ * Remove empty comment prefixes at line end, e.g. `// `
+ */
+export function legacyClearEndCommentPrefix(text: string): string {
+  const regex = /(?:\/\/|["'#]|;{1,2}|%{1,2}|--)(.*)$/
+  const result = regex.exec(text)
+
+  if (result && result[1].trim().length === 0) {
+    return text.slice(0, result.index)
+  }
+
+  return text
+}
diff --git a/packages/transformers/src/transformers/meta-highlight.ts b/packages/transformers/src/transformers/meta-highlight.ts
@@ -12,8 +12,8 @@ export function parseMetaHighlightString(meta: string): number[] | null {
       const num = v.split('-').map(v => Number.parseInt(v, 10))
       if (num.length === 1)
         return [num[0]]
-      else
-        return Array.from({ length: num[1] - num[0] + 1 }, (_, i) => i + num[0])
+
+      return Array.from({ length: num[1] - num[0] + 1 }, (_, i) => i + num[0])
     })
   return lines
 }
@@ -45,8 +45,13 @@ export function transformerMetaHighlight(
       if (!this.options.meta?.__raw) {
         return
       }
-      ;(this.meta as any)[symbol] ||= parseMetaHighlightString(this.options.meta.__raw)
-      const lines: number[] = (this.meta as any)[symbol] || []
+      const meta = this.meta as {
+        [symbol]: number[] | null
+      }
+
+      meta[symbol] ??= parseMetaHighlightString(this.options.meta.__raw)
+      const lines: number[] = meta[symbol] ?? []
+
       if (lines.includes(line))
         this.addClassToHast(node, className)
       return node

diff --git a/packages/transformers/src/transformers/notation-diff.ts b/packages/transformers/src/transformers/notation-diff.ts
@@ -14,6 +14,8 @@ export interface TransformerNotationDiffOptions {
    * Class added to the <pre> element when the current code has diff
    */
   classActivePre?: string
+
+  legacy?: boolean
 }
 
 /**
@@ -26,6 +28,7 @@ export function transformerNotationDiff(
     classLineAdd = 'diff add',
     classLineRemove = 'diff remove',
     classActivePre = 'has-diff',
+    legacy,
   } = options
 
   return transformerNotationMap(
@@ -34,6 +37,7 @@ export function transformerNotationDiff(
         '++': classLineAdd,
         '--': classLineRemove,
       },
+      legacy,
       classActivePre,
     },
     '@shikijs/transformers:notation-diff',

diff --git a/packages/transformers/src/transformers/notation-error-level.ts b/packages/transformers/src/transformers/notation-error-level.ts
@@ -7,6 +7,8 @@ export interface TransformerNotationErrorLevelOptions {
    * Class added to the <pre> element when the current code has diff
    */
   classActivePre?: string
+
+  legacy?: boolean
 }
 
 /**
@@ -21,12 +23,14 @@ export function transformerNotationErrorLevel(
       warning: ['highlighted', 'warning'],
     },
     classActivePre = 'has-highlighted',
+    legacy,
   } = options
 
   return transformerNotationMap(
     {
       classMap,
       classActivePre,
+      legacy,
     },
     '@shikijs/transformers:notation-error-level',
   )

diff --git a/packages/transformers/src/transformers/notation-focus.ts b/packages/transformers/src/transformers/notation-focus.ts
@@ -10,6 +10,8 @@ export interface TransformerNotationFocusOptions {
    * Class added to the root element when the code has focused lines
    */
   classActivePre?: string
+
+  legacy?: boolean
 }
 
 /**
@@ -21,6 +23,7 @@ export function transformerNotationFocus(
   const {
     classActiveLine = 'focused',
     classActivePre = 'has-focused',
+    legacy,
   } = options
 
   return transformerNotationMap(
@@ -29,6 +32,7 @@ export function transformerNotationFocus(
         focus: classActiveLine,
       },
       classActivePre,
+      legacy,
     },
     '@shikijs/transformers:notation-focus',
   )

diff --git a/packages/transformers/src/transformers/notation-highlight-word.ts b/packages/transformers/src/transformers/notation-highlight-word.ts
@@ -1,6 +1,6 @@
 import type { ShikiTransformer } from 'shiki'
 import { highlightWordInLine } from '../shared/highlight-word'
-import { createCommentNotationTransformer } from '../utils'
+import { createCommentNotationTransformer } from '../shared/notation-transformer'
 
 export interface TransformerNotationWordHighlightOptions {
   /**
@@ -11,6 +11,8 @@ export interface TransformerNotationWordHighlightOptions {
    * Class added to the root element when the code has highlighted words
    */
   classActivePre?: string
+
+  legacy?: boolean
 }
 
 export function transformerNotationWordHighlight(
@@ -19,27 +21,25 @@ export function transformerNotationWordHighlight(
   const {
     classActiveWord = 'highlighted-word',
     classActivePre = undefined,
+    legacy,
   } = options
 
   return createCommentNotationTransformer(
     '@shikijs/transformers:notation-highlight-word',
-    // comment-start             | marker    | word           | range | comment-end
-    /^\s*(?:\/\/|\/\*|<!--|#)\s+\[!code word:((?:\\.|[^:\]])+)(:\d+)?\]\s*(?:\*\/|-->)?/,
+    /\s*\[!code word:((?:\\.|[^:\]])+)(:\d+)?\]/,
     function ([_, word, range], _line, comment, lines, index) {
       const lineNum = range ? Number.parseInt(range.slice(1), 10) : lines.length
 
       // escape backslashes
       word = word.replace(/\\(.)/g, '$1')
-
-      lines
-        // Don't include the comment itself
-        .slice(index + 1, index + 1 + lineNum)
-        .forEach(line => highlightWordInLine.call(this, line, comment, word, classActiveWord))
+      for (let i = index; i < Math.min(index + lineNum, lines.length); i++) {
+        highlightWordInLine.call(this, lines[i], comment, word, classActiveWord)
+      }
 
       if (classActivePre)
         this.addClassToHast(this.pre, classActivePre)
       return true
     },
-    true, // remove empty lines
+    legacy,
   )
 }