From 13341496429b3c7fc74674a78192e99ac1f3ca3f Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 28 Aug 2024 22:14:16 -0700 Subject: [PATCH] wip --- scripts/check-link.js | 94 +++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 62 deletions(-) diff --git a/scripts/check-link.js b/scripts/check-link.js index f3312da1..b525704e 100644 --- a/scripts/check-link.js +++ b/scripts/check-link.js @@ -1,75 +1,45 @@ -const { exec } = require('child_process'); const fs = require('fs'); const path = require('path'); -const markdownLinkCheck = require('markdown-link-check'); - -const ignoredUrls = [ - 'https://(api|web)\\.smith\\.langchain\\.com/.*', - 'https://x\\.com/.*' -]; - -function convertNotebookToMarkdown(filePath) { - return new Promise((resolve, reject) => { - const outputPath = filePath.replace('.ipynb', '.md'); - exec(`jupyter nbconvert --to markdown "${filePath}" --output "${outputPath}"`, (error) => { - if (error) { - reject(error); - } else { - resolve(outputPath); - } - }); - }); +const { execSync } = require('child_process'); + +// Function to find all .ipynb files in the given directory +function findIpynbFiles(dir) { + let results = []; + const files = fs.readdirSync(dir); + for (const file of files) { + const filePath = path.join(dir, file); + const stat = fs.statSync(filePath); + if (stat.isDirectory()) { + results = results.concat(findIpynbFiles(filePath)); + } else if (path.extname(file) === '.ipynb') { + results.push(filePath); + } + } + return results; } -function checkLinks(filePath) { - return new Promise((resolve, reject) => { - const markdown = fs.readFileSync(filePath, 'utf8'); - markdownLinkCheck(markdown, { - ignorePatterns: ignoredUrls, - baseUrl: 'https://github.com', // Adjust this if needed - httpHeaders: [ - { - urls: ['https://github.com'], - headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36' - } - } - ] - }, (err, results) => { - if (err) { - reject(err); - } else { - const brokenLinks = results.filter(result => result.status === 'dead'); - resolve(brokenLinks); - } - }); - }); -} +// Main function to check links +function checkLinks() { + const ignorePatterns = [ + 'https://(api|web)\\.smith\\.langchain\\.com/.*', + 'https://x\\.com/.*' + ]; -async function main() { - const changedFiles = process.env.CHANGED_FILES.split(' '); - const notebookFiles = changedFiles.filter(file => path.extname(file) === '.ipynb'); + const ipynbFiles = findIpynbFiles('.'); + console.log('Found .ipynb files:', ipynbFiles); - for (const file of notebookFiles) { + for (const file of ipynbFiles) { + console.log(`Checking links in ${file}`); try { - const markdownFile = await convertNotebookToMarkdown(file); - const brokenLinks = await checkLinks(markdownFile); - - if (brokenLinks.length > 0) { - console.error(`Broken links found in ${file}:`); - brokenLinks.forEach(link => console.error(`- ${link.link}: ${link.statusCode}`)); - process.exit(1); + execSync(`yarn run linkinator ${file} ${ignorePatterns.map(pattern => `--skip "${pattern}"`).join(' ')}`, { stdio: 'inherit' }); + } catch (error) { + if (error.status === 5) { + console.log('Broken links found, but continuing...'); } else { - console.log(`No broken links found in ${file}`); + throw error; } - - // Clean up the temporary markdown file - fs.unlinkSync(markdownFile); - } catch (error) { - console.error(`Error processing ${file}:`, error); - process.exit(1); } } } -main(); \ No newline at end of file +checkLinks(); \ No newline at end of file