From 9d6f51c2314efd3870153f3156fd2f2fdee50d37 Mon Sep 17 00:00:00 2001 From: Chris Fenner Date: Thu, 5 Sep 2024 08:08:27 -0700 Subject: [PATCH] Add support for diff pdfs (#161) * Add support for diff pdfs This change adds new parameters: --diffpdf --diffbase --diffpdflog These parameters control the creation of a pdf diff-document against the provided ref, using latexdiff. * update invocation of --diff in action --- .github/workflows/render-samples.yml | 2 +- .gitignore | 2 + Dockerfile | 10 ++ build.sh | 135 +++++++++++++++++++++------ 4 files changed, 117 insertions(+), 32 deletions(-) diff --git a/.github/workflows/render-samples.yml b/.github/workflows/render-samples.yml index 2ebc72f..962f9aa 100644 --- a/.github/workflows/render-samples.yml +++ b/.github/workflows/render-samples.yml @@ -76,7 +76,7 @@ jobs: uses: trustedcomputinggroup/markdown@latest with: input-md: guide.tcg - extra-build-options: "--versioned_filenames --pr_number=${{ github.event.number }} --pr_repo=${{ github.repository }} --diff=${{ github.event.pull_request.base.sha }}" + extra-build-options: "--versioned_filenames --pr_number=${{ github.event.number }} --pr_repo=${{ github.repository }} --diffbase=${{ github.event.pull_request.base.sha }}" output-pdf: guide.pdf output-tex: guide.tex output-docx: guide.docx diff --git a/.gitignore b/.gitignore index 5eedcb5..8dc1f8a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ *.lot *.toc *.fdb_latexmk +*.upa +*.upb diff --git a/Dockerfile b/Dockerfile index 58d18ea..e85d634 100644 --- a/Dockerfile +++ b/Dockerfile @@ -151,6 +151,8 @@ ENV PATH="/usr/local/texlive/bin/aarch64-linux:/usr/local/texlive/bin/x86_64-lin # Packages that are needed despite not being used explicitly by the template: # bigfoot, catchfile, fancyvrb, footmisc, hardwrap, lineno, ltablex, latexmk, needspace, pgf, zref +# Package dependencies introduced by latexdiff: +# changebar, datetime2, latexdiff, listings, marginnote, pdfcomment, soulpos, ulem RUN tlmgr update --self && tlmgr install \ accsupp \ adjustbox \ @@ -165,6 +167,8 @@ RUN tlmgr update --self && tlmgr install \ booktabs \ caption \ catchfile \ + changebar \ + datetime2 \ draftwatermark \ enumitem \ etoolbox \ @@ -181,14 +185,17 @@ RUN tlmgr update --self && tlmgr install \ hyperref \ hyphenat \ koma-script \ + latexdiff \ latexmk \ lineno \ + listings \ ltablex \ lualatex-math \ luatex \ luatex85 \ luatexbase \ makecell \ + marginnote \ mathtools \ mdframed \ microtype \ @@ -196,14 +203,17 @@ RUN tlmgr update --self && tlmgr install \ needspace \ newunicodechar \ pagecolor \ + pdfcomment \ pdflscape \ pgf \ polyglossia \ ragged2e \ selnolig \ setspace \ + soulpos \ textpos \ titling \ + ulem \ unicode-math \ upquote \ varwidth \ diff --git a/build.sh b/build.sh index ecf55df..1ec3a1f 100755 --- a/build.sh +++ b/build.sh @@ -4,6 +4,7 @@ RESOURCE_DIR="/" #default to root of pandoc container buildout DO_GITVERSION="yes" DO_GITSTATUS="yes" PDF_OUTPUT="" +DIFFPDF_OUTPUT="" DOCX_OUTPUT="" HTML_OUTPUT="" LATEX_OUTPUT="" @@ -41,7 +42,9 @@ print_usage() { echo " --latex=output: enable output of latex and specify the output file name." echo " --html=output: enable output of html and specify the output file name." echo " --pdflog=output: enable logging of pdf engine and specify the output file name." - echo " --diff=commit: create diff documents against the provided commit" + echo " --diffpdf=output: enable output of pdf diff and specify the output file name (requires --diffpdf)" + echo " --diffbase=ref: create diff documents against the provided commit (no effect if --diffpdf is not provided)" + echo " --diffpdflog=output: enable logging of pdf engine during diffing and specify the output file name." echo echo "Miscellaneous" echo " --resourcedir=dir: Set the resource directory, defaults to root for pandoc containers" @@ -57,7 +60,7 @@ print_usage() { } -if ! options=$(getopt --longoptions=help,puppeteer,gitversion,gitstatus,nogitversion,table_rules,plain_quotes,versioned_filenames,pr_number:,pr_repo:,diff:,pdf:,latex:,pdflog:,pdf_engine:,docx:,html:,resourcedir: --options="" -- "$@"); then +if ! options=$(getopt --longoptions=help,puppeteer,gitversion,gitstatus,nogitversion,table_rules,plain_quotes,versioned_filenames,pr_number:,pr_repo:,diffbase:,pdf:,diffpdf:,diffpdflog:,latex:,pdflog:,pdf_engine:,docx:,html:,resourcedir: --options="" -- "$@"); then echo "Incorrect options provided" print_usage exit 1 @@ -66,7 +69,7 @@ fi eval set -- "${options}" while true; do case "$1" in - --diff) + --diffbase) DIFFBASE="${2}" shift 2 ;; @@ -115,6 +118,14 @@ while true; do PDF_OUTPUT="${2}" shift 2 ;; + --diffpdf) + DIFFPDF_OUTPUT="${2}" + shift 2 + ;; + --diffpdflog) + DIFFPDFLOG_OUTPUT="${2}" + shift 2 + ;; --html) HTML_OUTPUT="${2}" shift 2 @@ -157,6 +168,7 @@ readonly PR_REPO readonly DIFFBASE readonly PDF_ENGINE readonly PDFLOG_OUTPUT +readonly DIFFPDFLOG_OUTPUT shift "$(( OPTIND - 1 ))" @@ -198,6 +210,25 @@ mkdir -p "${BUILD_DIR}" cp -r . "${BUILD_DIR}" cd "${BUILD_DIR}" +# Let git work +git config --global --add safe.directory "${BUILD_DIR}" + +# make sure the diff arguments make sense +if [ -n "${DIFFPDF_OUTPUT}" ]; then + # --diff must be provided, and it must make sense to Git + if [ -z "${DIFFBASE}" ]; then + >&2 echo "--diffpdf was provided, but --diffbase was not." + print_usage + exit 1 + fi + git rev-parse --verify "${DIFFBASE}" > /dev/null 2>&1 + if [ $? -ne 0 ]; then + >&2 echo "--diffbase was provided, but it was not a valid Git commit, tag, or branch name." + print_usage + exit 1 + fi +fi + # Get the default browser if ! browser=$(command -v "chromium-browser"); then if ! browser=$(command -v "chromium"); then @@ -210,8 +241,6 @@ fi # figure out git version and revision if needed. EXTRA_PANDOC_OPTIONS="" if test "${DO_GITVERSION}" == "yes"; then - git config --global --add safe.directory /workspace - # TODO: Should we fail if dirty? raw_version="$(git describe --always --tags)" echo "Git version: ${raw_version}" @@ -348,6 +377,9 @@ if [ "${VERSIONED_FILENAMES}" == "yes" ]; then if [ ! -z "${PDF_OUTPUT}" ]; then PDF_OUTPUT=$(prefix_filename "${version_prefix}" "${PDF_OUTPUT}") fi + if [ ! -z "${DIFFPDF_OUTPUT}" ]; then + DIFFPDF_OUTPUT=$(prefix_filename "${DIFFBASE}_to_${version_prefix}" "${DIFFPDF_OUTPUT}") + fi if [ ! -z "${LATEX_OUTPUT}" ]; then LATEX_OUTPUT=$(prefix_filename "${version_prefix}" "${LATEX_OUTPUT}") fi @@ -356,6 +388,7 @@ if [ "${VERSIONED_FILENAMES}" == "yes" ]; then fi fi readonly PDF_OUTPUT +readonly DIFFPDF_OUTPUT readonly DOCX_OUTPUT readonly HTML_OUTPUT readonly LATEX_OUTPUT @@ -364,16 +397,15 @@ echo "Starting Build with" echo "file: ${INPUT_FILE}" echo "docx: ${DOCX_OUTPUT:-none}" echo "pdf: ${PDF_OUTPUT:-none} (engine: ${PDF_ENGINE})" +echo "diff pdf: ${DIFFPDF_OUTPUT:-none} (engine: ${PDF_ENGINE})" echo "latex: ${latex_ouput:-none}" echo "html: ${html_ouput:-none}" echo "resource dir: ${RESOURCE_DIR}" echo "build dir: ${BUILD_DIR}" echo "browser: ${browser}" echo "use git version: ${DO_GITVERSION}" -echo "use table rules: ${TABLE_RULES}" -echo "make block quotes Informative Text: ${BLOCK_QUOTES_ARE_INFORMATIVE_TEXT}" if [ ! -z "${DIFFBASE}" ]; then - echo "diff against: ${DIFFBASE}" + echo "diff against: ${DIFFBASE} ($(git rev-parse --verify ${DIFFBASE}))" fi if test "${DO_GITVERSION}" == "yes"; then echo "Git Generated Document Version Information" @@ -418,18 +450,27 @@ if [ "${BLOCK_QUOTES_ARE_INFORMATIVE_TEXT}" == "yes" ]; then EXTRA_PANDOC_OPTIONS+=" --lua-filter=informative-quote-blocks.lua" fi -# Hacks - -# \newpage is rendered as the string "\newpage" in GitHub markdown. -# Transform horizontal rules into \newpages. -# Exception: the YAML front matter of the document, so undo the instance on the first line. -# TODO: Turn this into a Pandoc filter. -sed -i.bak 's/^---$/\\newpage/g;1s/\\newpage/---/g' "${BUILD_DIR}/${INPUT_FILE}" - -# Transform sections before the table of contents into section*, which does not number them. -# While we're doing this, transform the case to all-caps. -# TODO: Turn this into a Pandoc filter. -sed -i.bak '0,/\\tableofcontents/s/^# \(.*\)/\\section*\{\U\1\}/g' "${BUILD_DIR}/${INPUT_FILE}" +# Use sed to perform some basic fixups on certain input files. +do_md_fixups() { + local input=$1 + # \newpage is rendered as the string "\newpage" in GitHub markdown. + # Transform horizontal rules into \newpages. + # Exception: the YAML front matter of the document, so undo the instance on the first line. + # TODO: Turn this into a Pandoc filter. + sed -i.bak 's/^---$/\\newpage/g;1s/\\newpage/---/g' "${input}" + + # Transform sections before the table of contents into section*, which does not number them. + # While we're doing this, transform the case to all-caps. + # TODO: Turn this into a Pandoc filter. + sed -i.bak '0,/\\tableofcontents/s/^# \(.*\)/\\section*\{\U\1\}/g' "${input}" +} +do_tex_fixups() { + local input=$1 + # We have a "code" enviroment that displays everything, including comments. + # Sometimes latexdiff injects comments that it thinks won't be displayed. + # Delete those latexdiff comments. + sed -i.bak 's/%DIFDELCMD.*//g' "${input}" +} if test "${DO_GITVERSION}" == "yes"; then # If using the git information for versioning, grab the date from there @@ -565,7 +606,7 @@ do_pdf() { local logfile=$3 # LaTeX engines choose this filename based on TEMP_TEX_FILE's basename. It also emits a bunch of other files. - readonly temp_pdf_file="$(basename ${input%.*}).pdf" + local temp_pdf_file="$(basename ${input%.*}).pdf" echo "Rendering PDF" local start=$(date +%s) @@ -580,16 +621,15 @@ do_pdf() { # Write any LaTeX errors to stderr. >&2 grep -A 5 "] ! " "${logfile}" - # Copy aux, lof, lot, and toc files back to the source directory so they can be cached and speed up future runs. - if [ -n "${PDFLOG_OUTPUT}" ]; then - cp "${logfile}" "${SOURCE_DIR}/${PDFLOG_OUTPUT}" - fi - cp *.aux "${SOURCE_DIR}" - cp *.lof "${SOURCE_DIR}" - cp *.lot "${SOURCE_DIR}" - cp *.toc "${SOURCE_DIR}" + # Copy aux, lof, lot, toc, upa, and upb files (if any) back to the source directory so they can be cached and speed up future runs. + cp *.aux "${SOURCE_DIR}" 2>/dev/null + cp *.lof "${SOURCE_DIR}" 2>/dev/null + cp *.lot "${SOURCE_DIR}" 2>/dev/null + cp *.toc "${SOURCE_DIR}" 2>/dev/null + cp *.upa "${SOURCE_DIR}" 2>/dev/null + cp *.upb "${SOURCE_DIR}" 2>/dev/null # Copy converted images so they can be cached as well. - cp *.convert.pdf "${SOURCE_DIR}" + cp *.convert.pdf "${SOURCE_DIR}" 2>/dev/null echo "Elapsed time: $(($end-$start)) seconds" # Write any LaTeX errors to stderr. >&2 grep -A 5 "! " "${logfile}" @@ -700,7 +740,8 @@ do_html() { # Generate .tex output if either latex or pdf formats were requested, because # the .tex is an intermediate requirement to the pdf. readonly TEMP_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.tex" -if [ -n "${PDF_OUTPUT}" -o -n "${LATEX_OUTPUT}" ]; then +if [ -n "${PDF_OUTPUT}" -o -n "${LATEX_OUTPUT}" -o -n "${DIFFPDF_OUTPUT}" ]; then + do_md_fixups "${BUILD_DIR}/${INPUT_FILE}" do_latex "${BUILD_DIR}/${INPUT_FILE}" "${TEMP_TEX_FILE}" fi if [ -n "${LATEX_OUTPUT}" ]; then @@ -711,6 +752,12 @@ fi readonly LATEX_LOG="${BUILD_DIR}/latex.log" if [ -n "${PDF_OUTPUT}" ]; then do_pdf "${TEMP_TEX_FILE}" "${SOURCE_DIR}/${PDF_OUTPUT}" "${LATEX_LOG}" + + # Copy the logs, if requested. + if [ -n "${PDFLOG_OUTPUT}" ]; then + mkdir -p "$(dirname ${SOURCE_DIR}/${PDFLOG_OUTPUT})" + cp "${LATEX_LOG}" "${SOURCE_DIR}/${PDFLOG_OUTPUT}" + fi fi # Generate the docx output @@ -724,6 +771,32 @@ if [ -n "${HTML_OUTPUT}" ]; then do_html "${BUILD_DIR}/${INPUT_FILE}" "${SOURCE_DIR}/${HTML_OUTPUT}" fi +# Generate the diff output +# Do this last so we can do whatever we want to the build directory +readonly TEMP_DIFFBASE_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.diffbase.tex" +readonly TEMP_DIFF_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.diff.tex" +readonly TEMP_LATEXDIFF_LOG="${BUILD_DIR}/latexdiff.log" +export MERMAID_FILTER_FORMAT="pdf" +if [ -n "${DIFFPDF_OUTPUT}" ]; then + git reset --hard ${DIFFBASE} + + do_md_fixups "${BUILD_DIR}/${INPUT_FILE}" + do_latex "${BUILD_DIR}/${INPUT_FILE}" "${TEMP_DIFFBASE_TEX_FILE}" + latexdiff --type PDFCOMMENT --driver "${PDF_ENGINE}" "${TEMP_DIFFBASE_TEX_FILE}" "${TEMP_TEX_FILE}" > "${TEMP_DIFF_TEX_FILE}" 2>"${TEMP_LATEXDIFF_LOG}" + do_tex_fixups "${TEMP_DIFF_TEX_FILE}" + do_pdf "${TEMP_DIFF_TEX_FILE}" "${SOURCE_DIR}/${DIFFPDF_OUTPUT}" "${LATEX_LOG}" + + # Copy the logs, if requested. Note that this file gets the latexdiff and PDF driver output. + if [ -n "${DIFFPDFLOG_OUTPUT}" ]; then + mkdir -p "$(dirname ${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT})" + echo "latexdiff output:" > "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}" + cat "${TEMP_LATEXDIFF_LOG}" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}" + echo "" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}" + echo "${PDF_ENGINE} output:" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}" + cat "${LATEX_LOG}" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}" + fi +fi + if [ "${FAILED}" = "true" ]; then echo "Overall workflow failed" exit 1