Skip to content

Commit

Permalink
fix: don't fail on HTML comments
Browse files Browse the repository at this point in the history
  • Loading branch information
petyosi committed Jan 25, 2024
1 parent 4cde7f8 commit 209edce
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docs/error-handling.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The markdown format can be complex due to its loose nature. You may integrate th

The editor component uses the [MDAST library](https://github.com/syntax-tree/mdast-util-from-markdown) to parse the markdown content. Although it's quite forgiving, certain content can cause the parsing to fail, in which case the editor will remain empty. To obtain more information about the error, you can pass a callback to the `onError` prop - the callback will receive a payload that includes the error message and the source markdown that triggered it.

### Parse errors caused by HTML-like formatting (e.g. HTML comments, or links surrounded by angle brackets)
### Parse errors caused by HTML-like formatting (e.g. links surrounded by angle brackets) or JSX expression syntax

To handle common basic HTML formatting (e.g. `u` tags), the default parsing includes the [mdast-util-mdx-jsx extension](https://github.com/syntax-tree/mdast-util-mdx-jsx). In some cases, this can cause the parsing to fail. You can disable this extension by setting the `suppressHtmlProcessing` prop to `true`, but you will lose the ability to use HTML-like formatting in your markdown.

Expand Down
2 changes: 2 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@
"micromark-extension-mdx-jsx": "^3.0.0",
"micromark-extension-mdx-md": "^2.0.0",
"micromark-extension-mdxjs": "^3.0.0",
"micromark-factory-space": "^2.0.0",
"micromark-util-symbol": "^2.0.0",
"react-hook-form": "^7.44.2",
"unidiff": "^1.0.2"
},
Expand Down
6 changes: 6 additions & 0 deletions src/@types/micromark-util-symbol/index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { codes as Codes, types as Types } from 'micromark-util-symbol/lib/default'

declare module 'micromark-util-symbol' {
export const codes: { [k in keyof typeof Codes]: (typeof Codes)[k] }
export const types: { [k in keyof typeof Types]: (typeof Types)[k] }
}
5 changes: 5 additions & 0 deletions src/examples/html.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ import { useCellValues } from '@mdxeditor/gurx'
const markdownWithSpan = `
# Hello World
foo
<!-- comment -->
bar
A paragraph with <span style="color: red" class="some">some red text <span style="color: blue">with some blue nesting.</span> in here.</span> in it.
`
export function HTag() {
Expand Down
174 changes: 174 additions & 0 deletions src/mdastUtilHtmlComment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// typed version of https://github.com/slorber/remark-comment/blob/slorber/multiline-comment-bug/index.js
import type { Config } from 'mdast-util-from-markdown/lib'
import { factorySpace } from 'micromark-factory-space'
import { markdownLineEnding } from 'micromark-util-character'
import { types, codes } from 'micromark-util-symbol'

import type { Code, Extension, Tokenizer } from 'micromark-util-types'

declare module 'micromark-util-types' {
interface TokenTypeMap {
comment: 'comment'
commentEnd: 'commentEnd'
data: 'data'
}
}

export function commentFromMarkdown(_options: { ast?: boolean }): Partial<Config> {
return {
canContainEols: ['comment'],
enter: {
comment(_) {
this.buffer()
}
},
exit: {
comment(token) {
const text = this.resume()
if (_options?.ast) {
this.enter(
{
// @ts-expect-error: hush.
type: 'comment',
value: '',
commentValue: text.slice(0, -2)
},
token
)
this.exit(token)
}
}
}
}
}

const tokenize: Tokenizer = (effects, ok, nok) => {
return start

function start(code: Code) {
effects.enter('comment')
effects.consume(code)
return open
}

function open(code: Code) {
if (code === codes.exclamationMark) {
effects.consume(code)
return declarationOpen
}

return nok(code)
}

function declarationOpen(code: Code) {
if (code === codes.dash) {
effects.consume(code)
return commentOpen
}

return nok(code)
}

function commentOpen(code: Code) {
if (code === codes.dash) {
effects.consume(code)
return commentStart
}

return nok(code)
}

function commentStart(code: Code) {
if (code === codes.greaterThan) {
return nok(code)
}

if (markdownLineEnding(code)) {
return atLineEnding(code)
}

effects.enter(types.data)

if (code === codes.dash) {
effects.consume(code)
return commentStartDash
}

return comment(code)
}

function commentStartDash(code: Code) {
if (code === codes.greaterThan) {
return nok(code)
}

return comment(code)
}

function comment(code: Code): any {
if (code === codes.eof) {
return nok(code)
}

if (code === codes.dash) {
effects.consume(code)
return commentClose
}

if (markdownLineEnding(code)) {
effects.exit(types.data)
return atLineEnding(code)
}

effects.consume(code)
return comment
}

function atLineEnding(code: Code): any {
effects.enter(types.lineEnding)
effects.consume(code)
effects.exit(types.lineEnding)
return factorySpace(effects, afterPrefix, types.linePrefix)
}

function afterPrefix(code: Code) {
if (markdownLineEnding(code)) {
return atLineEnding(code)
}

effects.enter(types.data)
return comment(code)
}

function commentClose(code: Code) {
if (code === codes.dash) {
effects.consume(code)
return end
}

return comment(code)
}

function end(code: Code) {
if (code === codes.greaterThan) {
effects.exit(types.data)
effects.enter('commentEnd') // See https://github.com/leebyron/remark-comment/pull/3#discussion_r1239494357
effects.consume(code)
effects.exit('commentEnd')
effects.exit('comment')
return ok(code)
}

if (code === codes.dash) {
effects.consume(code)
return end
}

return comment(code)
}
}

export const comment: Extension = {
flow: { [60]: { tokenize, concrete: true } },
text: { [60]: { tokenize } }
}
5 changes: 3 additions & 2 deletions src/plugins/core/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ import { SharedHistoryPlugin } from './SharedHistoryPlugin'
import { DirectiveDescriptor } from '../directives'
import { CodeBlockEditorDescriptor } from '../codeblock'
import { Directives } from 'mdast-util-directive'
import { comment, commentFromMarkdown } from '../../mdastUtilHtmlComment'
export * from './MdastHTMLNode'
export * from './GenericHTMLNode'
export * from './Icon'
Expand Down Expand Up @@ -858,8 +859,8 @@ export const corePlugin = realmPlugin<{
// Use the JSX extension to parse HTML
if (!params?.suppressHtmlProcessing) {
r.pubIn({
[addMdastExtension$]: mdxJsxFromMarkdown(),
[addSyntaxExtension$]: [mdxJsx(), mdxMd()],
[addMdastExtension$]: [mdxJsxFromMarkdown(), commentFromMarkdown({ ast: false })],
[addSyntaxExtension$]: [mdxJsx(), mdxMd(), comment],
[addToMarkdownExtension$]: mdxJsxToMarkdown(),
[addImportVisitor$]: MdastHTMLVisitor
})
Expand Down

0 comments on commit 209edce

Please sign in to comment.