From 5a4993b43a3c0741637870ef8b78ac2ed6278ba7 Mon Sep 17 00:00:00 2001
From: tmaret <tmaret@adobe.com>
Date: Wed, 13 Dec 2023 17:35:07 +0100
Subject: [PATCH] issue-33 - add script to import content authors

---
 tools/importer/import-author.js | 53 +++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 tools/importer/import-author.js

diff --git a/tools/importer/import-author.js b/tools/importer/import-author.js
new file mode 100644
index 00000000..b8d3054c
--- /dev/null
+++ b/tools/importer/import-author.js
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+export default {
+  /**
+   * Apply DOM operations to the provided document and return
+   * the root element to be then transformed to Markdown.
+   * @param {HTMLDocument} document The document
+   * @param {string} url The url of the page imported
+   * @param {string} html The raw html (the document is cleaned up during preprocessing)
+   * @param {object} params Object containing some parameters given by the import process.
+   * @returns {HTMLElement} The root element to be transformed
+   */
+  transformDOM: ({
+    // eslint-disable-next-line no-unused-vars
+    document, url, html, params,
+  }) => {
+    const main = document.body;
+    WebImporter.DOMUtils.remove(main, [
+      'noscript',
+    ]);
+    const contributor = main.querySelector('.contributorBlock');
+    // upgrade title to h3 tag
+    const title = contributor.querySelector('.name > p');
+    const h3 = document.createElement('h3');
+    h3.innerHTML = title.innerHTML;
+    title.replaceWith(h3);
+    return contributor;
+  },
+
+  /**
+   * Return a path that describes the document being transformed (file name, nesting...).
+   * The path is then used to create the corresponding Word document.
+   * @param {HTMLDocument} document The document
+   * @param {string} url The url of the page imported
+   * @param {string} html The raw html (the document is cleaned up during preprocessing)
+   * @param {object} params Object containing some parameters given by the import process.
+   * @return {string} The path
+   */
+  generateDocumentPath: ({
+    // eslint-disable-next-line no-unused-vars
+    document, url, html, params,
+  }) => WebImporter.FileUtils.sanitizePath(new URL(url).pathname.replace(/\.html$/, '').replace(/\/$/, '')),
+};