Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(mdx-loader): Remark plugin to report unused MDX / Markdown anchors links #9512

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/docusaurus-mdx-loader/src/processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import head from './remark/head';
import mermaid from './remark/mermaid';
import transformAdmonitions from './remark/admonitions';
import unusedDirectivesWarning from './remark/unusedDirectives';
import brokenAnchorsWarning from './remark/brokenAnchors';
import codeCompatPlugin from './remark/mdx1Compat/codeCompatPlugin';
import {getFormat} from './format';
import type {WebpackCompilerName} from '@docusaurus/utils';
Expand Down Expand Up @@ -126,6 +127,7 @@ async function createProcessorFactory() {
options.markdownConfig.mdx1Compat.comments ? comment : null,
...(options.remarkPlugins ?? []),
unusedDirectivesWarning,
brokenAnchorsWarning,
].filter((plugin): plugin is MDXPlugin => Boolean(plugin));

// codeCompatPlugin needs to be applied last after user-provided plugins
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`directives remark plugin - client compiler default behavior for hello file: console 1`] = `
[
[
"[WARNING] Docusaurus found 3 broken anchor in file "packages/docusaurus-mdx-loader/src/remark/brokenAnchors/__tests__/__fixtures__/anchors.md"
- #custom-id
- #notvalidinheading
- #notvalid",
],
]
`;

exports[`directives remark plugin - client compiler default behavior for hello file: result 1`] = `
"<h1><a href="#found">Hello</a></h1>
<h2><a href="#hello">found</a></h2>
<h2><a href="#title-with-space">title link with space</a></h2>
<p><a href="#hello">Found</a></p>
<h3>title with space</h3>
<p><a href="#title-with-space">title with space</a></p>
<h3>title</h3>
<p><a href="#title">title</a></p>
<p><a href="remoteAnchors.md#world">World found</a></p>
<p><a href="remoteAnchors.md">world</a></p>
<p><a href="./subfolder/remote.md#test">test</a></p>
<h2>test {#custom-id}</h2>
<p><a href="#custom-id">custom-id</a></p>
<h3><a href="#notvalidinheading">Not found</a></h3>
<p><a href="#notvalid">Not found</a></p>
<p><a href="remoteAnchors.md#worldnotvalid">World not found</a></p>
<p><a href="https://google.com">google</a></p>
<p><a href="#emptylink"></a></p>
<h6></h6>"
`;

exports[`directives remark plugin - client compiler default behavior for world file: console 1`] = `[]`;

exports[`directives remark plugin - client compiler default behavior for world file: result 1`] = `"<h1>World</h1>"`;
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import path from 'path';
import remark2rehype from 'remark-rehype';
import stringify from 'rehype-stringify';
import vfile from 'to-vfile';
import plugin from '../index';
import admonition from '../../admonitions';
import type {WebpackCompilerName} from '@docusaurus/utils';

const processFixture = async (
name: string,
{compilerName}: {compilerName: WebpackCompilerName},
) => {
const {remark} = await import('remark');

const filePath = path.join(__dirname, '__fixtures__', `${name}.md`);
const file = await vfile.read(filePath);
file.data.compilerName = compilerName;

const result = await remark()
.use(admonition)
.use(plugin)
.use(remark2rehype)
.use(stringify)
.process(file);

return result.value;
};

describe('directives remark plugin - client compiler', () => {
const consoleMock = jest.spyOn(console, 'warn').mockImplementation(() => {});
beforeEach(() => jest.clearAllMocks());

const options = {compilerName: 'client'} as const;

it('default behavior for hello file', async () => {
const result = await processFixture('anchors', options);
expect(result).toMatchSnapshot('result');
expect(consoleMock).toHaveBeenCalledTimes(1);
expect(consoleMock.mock.calls).toMatchSnapshot('console');
});

it('default behavior for world file', async () => {
const result = await processFixture('remoteAnchors', options);
expect(result).toMatchSnapshot('result');
expect(consoleMock).toHaveBeenCalledTimes(0);
expect(consoleMock.mock.calls).toMatchSnapshot('console');
});
});
193 changes: 193 additions & 0 deletions packages/docusaurus-mdx-loader/src/remark/brokenAnchors/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import path from 'path';
import process from 'process';
import logger from '@docusaurus/logger';
import {posixPath} from '@docusaurus/utils';
import type {Heading, Link, Text} from 'mdast';
// @ts-expect-error: TODO see https://github.com/microsoft/TypeScript/issues/49721
import type {Transformer, Processor, Parent} from 'unified';

// TODO as of April 2023, no way to import/re-export this ESM type easily :/
// This might change soon, likely after TS 5.2
// See https://github.com/microsoft/TypeScript/issues/49721#issuecomment-1517839391
// import type {Plugin} from 'unified';
type Plugin = any; // TODO fix this asap

const nodeTypes: ['heading', 'link'] = ['heading', 'link'];

type NodeType = {
heading: 'heading';
link: 'link';
text: 'text';
};

const nodeType: NodeType = {
heading: 'heading',
link: 'link',
text: 'text',
};
Comment on lines +23 to +33
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI using this TS is kind of equivalent and simpler:

const nodeType = {
  heading: 'heading',
  link: 'link',
  text: 'text',
} as const;

type NodeType = typeof nodeType;


type LinkAnchor = {
link: {
url: string | undefined;
anchor: string | undefined;
};
};

type HeadingAnchor = {
heading: {
text: string;
};
};

type AnchorList = {
links: LinkAnchor[];
headings: HeadingAnchor[];
};

type NodeAnchorList = {
nodes: AnchorList;
filePath: string;
};

/**
* Check if a URL is valid based on its protocol.
*
* @param {string} url - URL to validate.
* @returns {boolean} - Returns `true` if the URL is valid, `false` if it's invalid.
*
* Example:
* - Returns `true` if the URL has an invalid protocol, e.g., "http://www.example.com"
* - Returns `false` if the URL has a custom protocol, e.g., "docs:///path/to/file.txt"
*/
function isValidURL(url: string): boolean {
const invalidProtocols = [
'http:',
'https:',
'ftp:',
'ftps:',
'mailto:',
'tel:',
'sms:',
];
const customProtocols = ['docs:', 'file:'];
try {
const {protocol} = new URL(url);

if (invalidProtocols.includes(protocol)) {
return true;
} else if (customProtocols.includes(protocol)) {
return false;
}
return false;
} catch {
// if there is a problem with the URL, then it's not valid
// eg. empty string, url with no protocol
return false;
}
}

function logBrokenAnchorWarning(nodeAnchor: NodeAnchorList) {
const {links, headings} = nodeAnchor.nodes;
const {filePath} = nodeAnchor;
const headingsText = headings.map((heading) =>
heading.heading.text.toLowerCase(),
);

const invalidAnchors = links
.filter((link) => link.link.url === undefined)
.filter(
(el) =>
el.link.anchor && !headingsText.includes(el.link.anchor.toLowerCase()),
)
.map((el) => el.link.anchor);

if (invalidAnchors.length > 0) {
const numInvalidAnchors = logger.interpolate`number=${invalidAnchors.length}`;
const customPath = posixPath(path.relative(process.cwd(), filePath));
const fileLog = logger.interpolate`path=${customPath}`;
const invalidAnchorList = invalidAnchors
.map(formatAnchorMessage)
.join('\n');
logger.warn`Docusaurus found ${numInvalidAnchors} broken anchor in file ${fileLog}
${invalidAnchorList}`;
}
}

function formatAnchorMessage(str: string | undefined) {
return `- #${str}`;
}

function stringToAnchor(string: string) {
return string.replaceAll(' ', '-');
}

const plugin: Plugin = function plugin(this: Processor): Transformer {
return async (tree, file) => {
const {visit} = await import('unist-util-visit');

const anchorList: AnchorList = {links: [], headings: []};
visit<Parent, ['heading', 'link']>(
tree,
nodeTypes,
(directive: Heading | Link) => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

headings and links are not directives (:x ::xy or :::xyz), they are just headings and links, or nodes

// we only want to check custom protocols (eg. docs:, file:)
// and string that are considered as file (eg. file.md, google.com)
// and invalid protocols (eg. http:, https:, ftp:, etc.)
if (
(directive.type === 'link' && isValidURL(directive?.url)) ||
directive.children.length === 0
) {
return;
}

if (directive.type === nodeType.heading) {
const childNode = directive.children[0] as Link | Text;

// if the heading is a link, then check the anchor
if (childNode?.type === nodeType.link) {
const linkTextValue = childNode.children[0] as any;
anchorList.headings.push({
heading: {text: stringToAnchor(linkTextValue.value)},
});
} else if (childNode?.type === nodeType.text) {
anchorList.headings.push({
heading: {
text: stringToAnchor(childNode.value),
},
});
}
}

// check if link isn't empty
// ? should we report empty links?
if (directive.type === nodeType.link) {
const [url, anchor] = directive.url.split('#');
anchorList.links.push({
link: {
url: url === '' ? undefined : url,
anchor: anchor === '' ? undefined : anchor,
},
});
}
},
);

// We only enable these warnings for the client compiler
// This avoids emitting duplicate warnings in prod mode
// Note: the client compiler is used in both dev/prod modes
if (file.data.compilerName === 'client') {
logBrokenAnchorWarning({
nodes: anchorList,
filePath: file.path,
});
}
};
};

export default plugin;
Loading