diff --git a/packages/unified-latex-to-pretext/libs/pre-conversion-subs/break-on-boundaries.ts b/packages/unified-latex-to-pretext/libs/pre-conversion-subs/break-on-boundaries.ts new file mode 100644 index 00000000..83cf2b38 --- /dev/null +++ b/packages/unified-latex-to-pretext/libs/pre-conversion-subs/break-on-boundaries.ts @@ -0,0 +1,160 @@ +import { env, arg } from "@unified-latex/unified-latex-builder"; +import * as Ast from "@unified-latex/unified-latex-types"; +import { getNamedArgsContent } from "@unified-latex/unified-latex-util-arguments"; +import { + anyEnvironment, + anyMacro, + match, +} from "@unified-latex/unified-latex-util-match"; +import { replaceNode } from "@unified-latex/unified-latex-util-replace"; +import { + splitOnMacro, + unsplitOnMacro, +} from "@unified-latex/unified-latex-util-split"; +import { visit } from "@unified-latex/unified-latex-util-visit"; +import { VFileMessage } from "vfile-message"; + +/** + * All the divisions, where each item is {division macro, mapped environment}. + * Note that this is ordered from the "largest" division to the "smallest" division. + */ +const divisions: { division: string; mappedEnviron: string }[] = [ + { division: "part", mappedEnviron: "_part" }, + { division: "chapter", mappedEnviron: "_chapter" }, + { division: "section", mappedEnviron: "_section" }, + { division: "subsection", mappedEnviron: "_subsection" }, + { division: "subsubsection", mappedEnviron: "_subsubsection" }, + { division: "paragraph", mappedEnviron: "_paragraph" }, + { division: "subparagraph", mappedEnviron: "_subparagraph" }, +]; + +// check if a macro is a division macro +const isDivisionMacro = match.createMacroMatcher( + divisions.map((x) => x.division) +); + +// check if an environment is a newly created environment +const isMappedEnviron = match.createEnvironmentMatcher( + divisions.map((x) => x.mappedEnviron) +); + +/** + * Breaks up division macros into environments. Returns an object of warning messages + * for any groups that were removed. + */ +export function breakOnBoundaries(ast: Ast.Ast): { messages: VFileMessage[] } { + // messages for any groups removed + const messagesLst: { messages: VFileMessage[] } = { messages: [] }; + + replaceNode(ast, (node) => { + if (match.group(node)) { + // remove if it contains a division as an immediate child + if ( + node.content.some((child) => { + return anyMacro(child) && isDivisionMacro(child); + }) + ) { + const message = new VFileMessage( + "Warning: hoisted out of a group, which might break the LaTeX code." + ); + + // add the position of the group if available + if (node.position) { + message.line = node.position.start.line; + message.column = node.position.start.column; + message.position = { + start: { + line: node.position.start.line, + column: node.position.start.column, + }, + end: { + line: node.position.end.line, + column: node.position.end.column, + }, + }; + } + + message.source = "LatexConversion"; + messagesLst.messages.push(message); + + return node.content; + } + } + }); + + visit(ast, (node, info) => { + // needs to be an environment, root, or group node + if ( + !( + anyEnvironment(node) || + node.type === "root" || + match.group(node) + ) || + // skip math mode + info.context.hasMathModeAncestor + ) { + return; + } + // if it's an environment, make sure it isn't a newly created one + else if (anyEnvironment(node) && isMappedEnviron(node)) { + return; + } + + // now break up the divisions, starting at part + node.content = breakUp(node.content, 0); + }); + + replaceNode(ast, (node) => { + // remove all old division nodes + if (anyMacro(node) && isDivisionMacro(node)) { + return null; + } + }); + + return messagesLst; +} + +/** + * Recursively breaks up the AST at the division macros. + */ +function breakUp(content: Ast.Node[], depth: number): Ast.Node[] { + // broke up all divisions + if (depth > 6) { + return content; + } + + const splits = splitOnMacro(content, divisions[depth].division); + + // go through each segment to recursively break + for (let i = 0; i < splits.segments.length; i++) { + splits.segments[i] = breakUp(splits.segments[i], depth + 1); + } + + createEnvironments(splits, divisions[depth].mappedEnviron); + + // rebuild this part of the AST + return unsplitOnMacro(splits); +} + +/** + * Create the new environments that replace the division macros. + */ +function createEnvironments( + splits: { segments: Ast.Node[][]; macros: Ast.Macro[] }, + newEnviron: string +): void { + // loop through segments (skipping first segment) + for (let i = 1; i < splits.segments.length; i++) { + // get the title + const title = getNamedArgsContent(splits.macros[i - 1])["title"]; + const titleArg: Ast.Argument[] = []; + + // create title argument + if (title) { + titleArg.push(arg(title, { braces: "[]" })); + } + + // wrap segment with a new environment + splits.segments[i] = [env(newEnviron, splits.segments[i], titleArg)]; + } +} diff --git a/packages/unified-latex-to-pretext/tests/break-on-boundaries.test.ts b/packages/unified-latex-to-pretext/tests/break-on-boundaries.test.ts new file mode 100644 index 00000000..69a16ae9 --- /dev/null +++ b/packages/unified-latex-to-pretext/tests/break-on-boundaries.test.ts @@ -0,0 +1,159 @@ +import { describe, it, expect } from "vitest"; +import util from "util"; +import { getParser } from "@unified-latex/unified-latex-util-parse"; +import { printRaw } from "@unified-latex/unified-latex-util-print-raw"; +import { breakOnBoundaries } from "../libs/pre-conversion-subs/break-on-boundaries"; + +// Make console.log pretty-print by default +const origLog = console.log; +console.log = (...args) => { + origLog(...args.map((x) => util.inspect(x, false, 10, true))); +}; + +describe("unified-latex-to-pretext:break-on-boundaries", () => { + let value: string; + + it("can break on parts", () => { + value = String.raw`\part{Foo}Hi, this is a part\part{Bar}This is another part`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast)).toEqual({ messages: [] }); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{_part}[Foo]Hi, this is a part\end{_part}\begin{_part}[Bar]This is another part\end{_part}` + ); + }); + + it("can break on a combination of divisions", () => { + value = String.raw`\part{part1}\section{Section1}Hi, this is a section\chapter{chap1}This is a chapter\section{Subsection2}`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast)).toEqual({ messages: [] }); + + expect(printRaw(ast)).toEqual( + "" + + String.raw`\begin{_part}[part1]` + + String.raw`\begin{_section}[Section1]Hi, this is a section\end{_section}` + + String.raw`\begin{_chapter}[chap1]This is a chapter` + + String.raw`\begin{_section}[Subsection2]\end{_section}\end{_chapter}\end{_part}` + ); + }); + + it("can break on divisions wrapped around by a document environment", () => { + value = String.raw`\begin{document}\section{Baz}Hi, this is a subsection\subsubsection{Foo}description.\end{document}`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast)).toEqual({ messages: [] }); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{document}\begin{_section}[Baz]Hi, this is a subsection` + + String.raw`\begin{_subsubsection}[Foo]description.\end{_subsubsection}` + + String.raw`\end{_section}\end{document}` + ); + }); + + it("can break on divisions wrapped around by different environments", () => { + value = + String.raw`\begin{center}\part{name}Hi, this is a part\begin{environ}` + + String.raw`\subparagraph{title}description.\end{environ}\end{center}`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast).messages.length).toEqual(0); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{center}\begin{_part}[name]Hi, this is a part` + + String.raw`\begin{environ}\begin{_subparagraph}[title]description.` + + String.raw`\end{_subparagraph}\end{environ}\end{_part}\end{center}` + ); + }); + + it("can break on divisions in a group", () => { + value = + String.raw`\begin{document}\chapter{Chap}` + + String.raw`{\paragraph{Intro}Introduction.\begin{center}\subparagraph{Conclusion}Conclusion.\end{center}}` + + String.raw`Chapter finished.\end{document}`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast).messages.length).toEqual(1); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{document}\begin{_chapter}[Chap]\begin{_paragraph}[Intro]Introduction.` + + String.raw`\begin{center}\begin{_subparagraph}[Conclusion]Conclusion.\end{_subparagraph}` + + String.raw`\end{center}Chapter finished.\end{_paragraph}\end{_chapter}\end{document}` + ); + }); + + it("can break on divisions in nested groups", () => { + value = + String.raw`\part{part1}{\subsection{Intro}description.` + + String.raw`\subsubsection{body}more text.{\subparagraph{Conclusion}Conclusion.}}`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast).messages.length).toEqual(2); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{_part}[part1]\begin{_subsection}[Intro]description.` + + String.raw`\begin{_subsubsection}[body]more text.\begin{_subparagraph}[Conclusion]Conclusion.` + + String.raw`\end{_subparagraph}\end{_subsubsection}\end{_subsection}\end{_part}` + ); + }); + + it("doesn't break on groups without a division as an immediate child", () => { + value = + String.raw`\part{part1}{\subsection{Intro}` + + String.raw`\subsubsection{body}{$\mathbb{N}$\subparagraph{Conclusion}{no divisions 1}Conclusion.}}{no divisions 2}`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast).messages.length).toEqual(2); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{_part}[part1]\begin{_subsection}[Intro]\begin{_subsubsection}[body]` + + String.raw`$\mathbb{N}$\begin{_subparagraph}[Conclusion]{no divisions 1}Conclusion.{no divisions 2}` + + String.raw`\end{_subparagraph}\end{_subsubsection}\end{_subsection}\end{_part}` + ); + }); + + it("can break on divisions with latex in their titles", () => { + value = String.raw`\chapter{$x = \frac{1}{2}$}Chapter 1\subsection{\"name\_1\" \$}This is subsection`; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast).messages.length).toEqual(0); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{_chapter}[$x = \frac{1}{2}$]Chapter 1` + + String.raw`\begin{_subsection}[\"name\_1\" \$]This is subsection` + + String.raw`\end{_subsection}\end{_chapter}` + ); + }); + + it("can break on divisions and trim whitespace around division beginnings and endings", () => { + value = String.raw` \subsubsection{first}subsection 1 \paragraph{body}This is paragraph `; + + const parser = getParser(); + const ast = parser.parse(value); + + expect(breakOnBoundaries(ast).messages.length).toEqual(0); + + expect(printRaw(ast)).toEqual( + String.raw`\begin{_subsubsection}[first]subsection 1 ` + + String.raw`\begin{_paragraph}[body]This is paragraph` + + String.raw`\end{_paragraph}\end{_subsubsection}` + ); + }); +});