diff --git a/tools/importer/import-author.js b/tools/importer/import-author.js new file mode 100644 index 00000000..b8d3054c --- /dev/null +++ b/tools/importer/import-author.js @@ -0,0 +1,53 @@ +/* + * Copyright 2023 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export default { + /** + * Apply DOM operations to the provided document and return + * the root element to be then transformed to Markdown. + * @param {HTMLDocument} document The document + * @param {string} url The url of the page imported + * @param {string} html The raw html (the document is cleaned up during preprocessing) + * @param {object} params Object containing some parameters given by the import process. + * @returns {HTMLElement} The root element to be transformed + */ + transformDOM: ({ + // eslint-disable-next-line no-unused-vars + document, url, html, params, + }) => { + const main = document.body; + WebImporter.DOMUtils.remove(main, [ + 'noscript', + ]); + const contributor = main.querySelector('.contributorBlock'); + // upgrade title to h3 tag + const title = contributor.querySelector('.name > p'); + const h3 = document.createElement('h3'); + h3.innerHTML = title.innerHTML; + title.replaceWith(h3); + return contributor; + }, + + /** + * Return a path that describes the document being transformed (file name, nesting...). + * The path is then used to create the corresponding Word document. + * @param {HTMLDocument} document The document + * @param {string} url The url of the page imported + * @param {string} html The raw html (the document is cleaned up during preprocessing) + * @param {object} params Object containing some parameters given by the import process. + * @return {string} The path + */ + generateDocumentPath: ({ + // eslint-disable-next-line no-unused-vars + document, url, html, params, + }) => WebImporter.FileUtils.sanitizePath(new URL(url).pathname.replace(/\.html$/, '').replace(/\/$/, '')), +};