diff --git a/apicache/0c28bbdde90ce46c40c2adaf87f7339ec19a729dcb00249ff42f41f8dc1f8142 b/apicache/0c28bbdde90ce46c40c2adaf87f7339ec19a729dcb00249ff42f41f8dc1f8142
deleted file mode 100644
index 687e888..0000000
Binary files a/apicache/0c28bbdde90ce46c40c2adaf87f7339ec19a729dcb00249ff42f41f8dc1f8142 and /dev/null differ
diff --git a/apicache/3153446787d457631aa6ec636af843866c871f5f56b09283e0246b7b9b7addba b/apicache/3153446787d457631aa6ec636af843866c871f5f56b09283e0246b7b9b7addba
deleted file mode 100644
index 1eca06d..0000000
Binary files a/apicache/3153446787d457631aa6ec636af843866c871f5f56b09283e0246b7b9b7addba and /dev/null differ
diff --git a/apicache/51852b6e6c8ad5c02d87b0d5c643324a490c00b02cb5151c3423567416b6418a b/apicache/51852b6e6c8ad5c02d87b0d5c643324a490c00b02cb5151c3423567416b6418a
deleted file mode 100644
index 5507912..0000000
Binary files a/apicache/51852b6e6c8ad5c02d87b0d5c643324a490c00b02cb5151c3423567416b6418a and /dev/null differ
diff --git a/apicache/6c747ab03ef6057844368f3b049a1404d70481ded4bac6d14a1eb12f2c9177dd b/apicache/6c747ab03ef6057844368f3b049a1404d70481ded4bac6d14a1eb12f2c9177dd
deleted file mode 100644
index 015b047..0000000
Binary files a/apicache/6c747ab03ef6057844368f3b049a1404d70481ded4bac6d14a1eb12f2c9177dd and /dev/null differ
diff --git a/apicache/7f19f9d02421f697179c40f256cc2bc26874d465db53e709c6554e645592b0a1 b/apicache/7f19f9d02421f697179c40f256cc2bc26874d465db53e709c6554e645592b0a1
deleted file mode 100644
index 0cdb3b6..0000000
Binary files a/apicache/7f19f9d02421f697179c40f256cc2bc26874d465db53e709c6554e645592b0a1 and /dev/null differ
diff --git a/apicache/9f4738a9683ad8317eccf68a9f0ac99733ad245f8591f27775196ff85281262f b/apicache/9f4738a9683ad8317eccf68a9f0ac99733ad245f8591f27775196ff85281262f
deleted file mode 100644
index 9f79f13..0000000
Binary files a/apicache/9f4738a9683ad8317eccf68a9f0ac99733ad245f8591f27775196ff85281262f and /dev/null differ
diff --git a/apicache/c8cfa01cfa3d31d40333531ef714e97d7a0ded341ad1ee57a084cb1ede53f371 b/apicache/c8cfa01cfa3d31d40333531ef714e97d7a0ded341ad1ee57a084cb1ede53f371
deleted file mode 100644
index 7f360d3..0000000
Binary files a/apicache/c8cfa01cfa3d31d40333531ef714e97d7a0ded341ad1ee57a084cb1ede53f371 and /dev/null differ
diff --git a/apicache/e87d1f5aa0e9fc68dbb97d738cde91273e2ed3281fc390f8420535e7b0548f79 b/apicache/e87d1f5aa0e9fc68dbb97d738cde91273e2ed3281fc390f8420535e7b0548f79
deleted file mode 100644
index 6ad1442..0000000
Binary files a/apicache/e87d1f5aa0e9fc68dbb97d738cde91273e2ed3281fc390f8420535e7b0548f79 and /dev/null differ
diff --git a/app copy 2.js b/app copy 2.js
new file mode 100644
index 0000000..f893701
--- /dev/null
+++ b/app copy 2.js	
@@ -0,0 +1,139 @@
+// Backup from 10/25 before adding jobs table
+const jobslist = document.getElementById('jobslist');
+const searchBar = document.getElementById('searchBar');
+const mainCategory = document.getElementById('mainCategory');
+let jobData = [];
+
+// Event listener for the search bar
+searchBar.addEventListener('keyup', (e) => {
+    const searchString = e.target.value.toLowerCase();
+    const searchWords = searchString.split(' ').filter(word => word.length > 0);
+
+    if (searchWords.length === 0) {
+        displayJobs(jobData.flatMap(category => {
+            return Object.entries(category.jobs).map(([jobTitle, job]) => ({
+                main_category: category.main_category,
+                jobTitle,
+                job
+            }));
+        }));
+        return;
+    }
+
+    const filteredJobs = jobData.flatMap(category => {
+        return Object.entries(category.jobs)
+            .filter(([jobTitle, job]) => {
+                const jobTitleMatch = searchWords.some(word => jobTitle.toLowerCase().includes(word));
+                const linksMatch = job.links.some(link =>
+                    searchWords.some(word => 
+                        link.url.toLowerCase().includes(word) || 
+                        link.category.toLowerCase().includes(word)
+                    )
+                );
+                return jobTitleMatch || linksMatch;
+            })
+            .map(([jobTitle, job]) => ({
+                main_category: category.main_category,
+                jobTitle,
+                job
+            }));
+    });
+
+    displayJobs(filteredJobs);
+});
+
+// Function to load jobs data
+const loadJobs = async () => {
+    try {
+        const res = await fetch('jobs.json'); // Adjust the path if necessary
+        jobData = await res.json(); // Load all categories and jobs
+        displayJobs(jobData.flatMap(category => {
+            return Object.entries(category.jobs).map(([jobTitle, job]) => ({
+                main_category: category.main_category,
+                jobTitle,
+                job
+            }));
+        }));
+    } catch (err) {
+        console.error(err);
+    }
+};
+
+const displayJobs = (jobs) => {
+    let lastCategory = '';
+    const htmlString = jobs.map(({ main_category, jobTitle, job }) => {
+        const isNewCategory = main_category !== lastCategory;
+        lastCategory = main_category;
+
+        const linksHtml = job.links.map(link => {
+            return `
+                <li class="link">
+                    <span class="category">${link.category}</span>
+                    <a href="${link.url}" target="_blank">${link.url}</a>
+                </li>
+            `;
+        }).join('');
+
+        const videosHtml = job.videos.map(video => {
+            const videoId = extractVideoId(video.url);
+            const thumbnailUrl = `https://img.youtube.com/vi/${videoId}/hqdefault.jpg`;
+
+            return `
+                <div class="video-wrapper" data-video-id="${videoId}" style="margin-bottom: 20px;">
+                    <img src="${thumbnailUrl}" class="video-thumbnail" alt="Video thumbnail" style="cursor: pointer;" />
+                </div>
+            `;
+        }).join('');
+
+        // Display degree required in a separate text box next to the job title
+        return `
+            ${isNewCategory ? `<h2 class="main-category">${main_category}</h2>` : ''}
+            <div class="job-section">
+                <h3 class="job-title">${jobTitle}</h3>
+                <span class="degree-box">Degree Required: ${job.degree_required}</span> <!-- Degree info in a separate box -->
+                <ul class="links-list">${linksHtml}</ul>
+                <div class="videos-container">${videosHtml}</div>
+            </div>
+        `;
+    }).join('');
+
+    jobslist.innerHTML = htmlString;
+    updateMainCategory(jobs);
+
+    // Add event listeners for lazy loading videos
+    document.querySelectorAll('.video-thumbnail').forEach(thumbnail => {
+        thumbnail.addEventListener('click', function() {
+            const videoWrapper = this.closest('.video-wrapper');
+            const videoId = videoWrapper.dataset.videoId;
+
+            const iframe = document.createElement('iframe');
+            iframe.src = `https://www.youtube.com/embed/${videoId}?autoplay=1`;
+            iframe.setAttribute('frameborder', '0');
+            iframe.setAttribute('allow', 'accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture');
+            iframe.setAttribute('allowfullscreen', true);
+            iframe.setAttribute('loading', 'lazy');
+
+            videoWrapper.innerHTML = ''; // Clear the thumbnail
+            videoWrapper.appendChild(iframe);
+        });
+    });
+};
+
+// Function to update the main category display
+const updateMainCategory = (jobs) => {
+    const firstJob = jobs[0];
+    if (firstJob) {
+        mainCategory.textContent = firstJob.main_category;
+    } else {
+        mainCategory.textContent = ''; // Clear the main category if no jobs
+    }
+};
+
+// Function to extract the YouTube video ID from the URL
+const extractVideoId = (url) => {
+    const urlParams = new URLSearchParams(new URL(url).search);
+    return urlParams.get('v') || url.split('/').pop();
+};
+
+// Load jobs data when the page is ready
+document.addEventListener('DOMContentLoaded', loadJobs);
diff --git a/app copy 3.js b/app copy 3.js
new file mode 100644
index 0000000..7019a21
--- /dev/null
+++ b/app copy 3.js	
@@ -0,0 +1,144 @@
+// 10/25/24 backup
+const jobslist = document.getElementById('jobslist');
+const searchBar = document.getElementById('searchBar');
+const mainCategory = document.getElementById('mainCategory');
+let jobData = [];
+
+// Event listener for the search bar
+searchBar.addEventListener('keyup', (e) => {
+    const searchString = e.target.value.toLowerCase();
+    const searchWords = searchString.split(' ').filter(word => word.length > 0);
+
+    if (searchWords.length === 0) {
+        displayJobs(jobData.flatMap(category => {
+            return Object.entries(category.jobs).map(([jobTitle, job]) => ({
+                main_category: category.main_category,
+                jobTitle,
+                job
+            }));
+        }));
+        return;
+    }
+
+    const filteredJobs = jobData.flatMap(category => {
+        return Object.entries(category.jobs)
+            .filter(([jobTitle, job]) => {
+                const jobTitleMatch = searchWords.some(word => jobTitle.toLowerCase().includes(word));
+                const linksMatch = job.links.some(link =>
+                    searchWords.some(word =>
+                        link.url.toLowerCase().includes(word) || 
+                        link.category.toLowerCase().includes(word)
+                    )
+                );
+                return jobTitleMatch || linksMatch;
+            })
+            .map(([jobTitle, job]) => ({
+                main_category: category.main_category,
+                jobTitle,
+                job
+            }));
+    });
+
+    displayJobs(filteredJobs);
+});
+
+// Function to load jobs data
+const loadJobs = async () => {
+    try {
+        const res = await fetch('jobs.json'); // Adjust the path if necessary
+        jobData = await res.json(); // Load all categories and jobs
+        displayJobs(jobData.flatMap(category => {
+            return Object.entries(category.jobs).map(([jobTitle, job]) => ({
+                main_category: category.main_category,
+                jobTitle,
+                job
+            }));
+        }));
+    } catch (err) {
+        console.error(err);
+    }
+};
+
+const displayJobs = (jobs) => {
+    let lastCategory = '';
+    const htmlString = jobs.map(({ main_category, jobTitle, job }) => {
+        const isNewCategory = main_category !== lastCategory;
+        lastCategory = main_category;
+
+        const linksHtml = job.links.map(link => {
+            return `
+                <li class="link">
+                    <span class="category">${link.category}</span>
+                    <a href="${link.url}" target="_blank">${link.url}</a>
+                </li>
+            `;
+        }).join('');
+
+        const videosHtml = job.videos.map(video => {
+            const videoId = extractVideoId(video.url);
+            const thumbnailUrl = `https://img.youtube.com/vi/${videoId}/hqdefault.jpg`;
+
+            return `
+                <div class="video-wrapper" data-video-id="${videoId}" style="margin-bottom: 20px;">
+                    <img src="${thumbnailUrl}" class="video-thumbnail" alt="Video thumbnail" style="cursor: pointer;" />
+                </div>
+            `;
+        }).join('');
+
+        // Check if a jobs table exists for the job
+        const jobsTableHtml = job.jobs_table ? job.jobs_table : '';
+
+        // Display degree required in a separate text box next to the job title
+        return `
+            ${isNewCategory ? `<h2 class="main-category">${main_category}</h2>` : ''}
+            <div class="job-section">
+                <h3 class="job-title">${jobTitle}</h3>
+                <span class="degree-box">Degree Required: ${job.degree_required}</span>
+                <ul class="links-list">${linksHtml}</ul>
+                <div class="videos-container">${videosHtml}</div>
+                <!-- Add the job table if it exists -->
+                <div class="jobs-table">${jobsTableHtml}</div>
+            </div>
+        `;
+    }).join('');
+
+    jobslist.innerHTML = htmlString;
+    updateMainCategory(jobs);
+
+    // Add event listeners for lazy loading videos
+    document.querySelectorAll('.video-thumbnail').forEach(thumbnail => {
+        thumbnail.addEventListener('click', function() {
+            const videoWrapper = this.closest('.video-wrapper');
+            const videoId = videoWrapper.dataset.videoId;
+
+            const iframe = document.createElement('iframe');
+            iframe.src = `https://www.youtube.com/embed/${videoId}?autoplay=1`;
+            iframe.setAttribute('frameborder', '0');
+            iframe.setAttribute('allow', 'accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture');
+            iframe.setAttribute('allowfullscreen', true);
+            iframe.setAttribute('loading', 'lazy');
+
+            videoWrapper.innerHTML = ''; // Clear the thumbnail
+            videoWrapper.appendChild(iframe);
+        });
+    });
+};
+
+// Function to update the main category display
+const updateMainCategory = (jobs) => {
+    const firstJob = jobs[0];
+    if (firstJob) {
+        mainCategory.textContent = firstJob.main_category;
+    } else {
+        mainCategory.textContent = ''; // Clear the main category if no jobs
+    }
+};
+
+// Function to extract the YouTube video ID from the URL
+const extractVideoId = (url) => {
+    const urlParams = new URLSearchParams(new URL(url).search);
+    return urlParams.get('v') || url.split('/').pop();
+};
+
+// Load jobs data when the page is ready
+document.addEventListener('DOMContentLoaded', loadJobs);
diff --git a/app.css b/app.css
index 85a4018..505d6f5 100644
--- a/app.css
+++ b/app.css
@@ -326,3 +326,99 @@ h3 {
 .degree-box.green {
     background-color: #28a745; /* Green for Graduate degree */
 }
+
+
+/* Basic styling for the body */
+body {
+    font-family: Arial, sans-serif;
+    background-color: #f9f9f9;
+    margin: 20px;
+}
+
+/* Style for the table */
+table {
+    width: 50%;  /* Set table width to 50% for left half of the screen */
+    /* border-collapse: collapse; */
+    margin: 20px 0;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
+}
+
+/* Style for table headers */
+th {
+    background-color: #4CAF50;
+    color: white;
+    padding: 10px;
+    text-align: left;  /* Align text in headers to the left */
+}
+
+/* Style for table cells */
+td {
+    padding: 8px;  /* Reduced padding for less whitespace */
+    /* border: 1px solid #ddd; */
+    text-align: left;  /* Left-align text in cells */
+    white-space: nowrap;  /* Prevent text wrapping */
+}
+
+/* Hover effect for table rows */
+tr:hover {
+    background-color: #ff000015;
+}
+
+/* Responsive table */
+@media (max-width: 600px) {
+    table {
+        width: 100%;
+        display: block;
+        overflow-x: auto;
+    }
+}
+
+
+/* Basic styling for the body */
+body {
+    font-family: Arial, sans-serif;
+    background-color: #f9f9f9;
+    margin: 20px;
+}
+
+/* Style for the table */
+table {
+    width: 60%;  /* Set table width to 60% to make it a bit wider */
+    border-collapse: collapse;
+    margin: 20px auto; /* Center-align the table */
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
+}
+
+/* Style for table headers */
+th {
+    background-color: #4CAF50;
+    color: white;
+    padding: 10px;
+    text-align: left;  /* Align text in headers to the left */
+}
+
+/* Style for table cells */
+td {
+    padding: 8px;  /* Reduced padding for less whitespace */
+    text-align: left;  /* Left-align text in cells */
+    white-space: nowrap;  /* Prevent text wrapping */
+}
+
+/* Wider first column */
+td:first-child, th:first-child {
+    width: 40%; /* Adjust this width as necessary for the first column */
+}
+
+/* Hover effect for table rows */
+tr:hover {
+    background-color: #ff000015;
+}
+
+/* Responsive table */
+@media (max-width: 600px) {
+    table {
+        width: 100%;
+        display: block;
+        overflow-x: auto;
+    }
+}
diff --git a/app.js b/app.js
index af610fe..ce6a4cc 100644
--- a/app.js
+++ b/app.js
@@ -2,29 +2,76 @@ const jobslist = document.getElementById('jobslist');
 const searchBar = document.getElementById('searchBar');
 const mainCategory = document.getElementById('mainCategory');
 let jobData = [];
+let displayedJobs = [];
+let filteredJobs = [];
+const chunkSize = 20; // Number of jobs to load at a time
+let currentIndex = 0;
 
 // Event listener for the search bar
 searchBar.addEventListener('keyup', (e) => {
     const searchString = e.target.value.toLowerCase();
     const searchWords = searchString.split(' ').filter(word => word.length > 0);
+    
+    // Debouncing for search to reduce load during typing
+    clearTimeout(window.searchTimeout);
+    window.searchTimeout = setTimeout(() => {
+        if (searchWords.length === 0) {
+            resetJobs(); // Reset to original job data
+            return;
+        }
+        filteredJobs = filterJobs(searchWords);
+        displayedJobs = []; // Clear displayed jobs for new search results
+        currentIndex = 0; // Reset index for filtered results
+        loadMoreJobs(); // Load the first chunk of filtered jobs
+    }, 300); // Adjust timeout as needed
+});
 
-    if (searchWords.length === 0) {
-        displayJobs(jobData.flatMap(category => {
-            return Object.entries(category.jobs).map(([jobTitle, job]) => ({
-                main_category: category.main_category,
-                jobTitle,
-                job
-            }));
-        }));
-        return;
+// Function to load jobs data
+const loadJobs = async () => {
+    try {
+        const res = await fetch('jobs.json'); // Adjust the path if necessary
+        jobData = await res.json(); // Load all categories and jobs
+        loadMoreJobs(); // Load the initial chunk of jobs
+    } catch (err) {
+        console.error(err);
     }
+};
+
+// Function to load more jobs
+const loadMoreJobs = () => {
+    const jobsToLoad = filteredJobs.length > 0 ? filteredJobs : jobData.flatMap(category => {
+        return Object.entries(category.jobs).map(([jobTitle, job]) => ({
+            main_category: category.main_category,
+            jobTitle,
+            job
+        }));
+    });
+
+    const nextJobs = jobsToLoad.slice(currentIndex, currentIndex + chunkSize);
+
+    if (nextJobs.length === 0) return; // No more jobs to load
+
+    displayedJobs = [...displayedJobs, ...nextJobs];
+    currentIndex += chunkSize;
 
-    const filteredJobs = jobData.flatMap(category => {
+    displayJobs(displayedJobs);
+};
+
+// Function to reset jobs when search input is cleared
+const resetJobs = () => {
+    displayedJobs = [];
+    currentIndex = 0;
+    loadMoreJobs(); // Load the first chunk again
+};
+
+// Function to filter jobs based on search input
+const filterJobs = (searchWords) => {
+    return jobData.flatMap(category => {
         return Object.entries(category.jobs)
             .filter(([jobTitle, job]) => {
                 const jobTitleMatch = searchWords.some(word => jobTitle.toLowerCase().includes(word));
                 const linksMatch = job.links.some(link =>
-                    searchWords.some(word => 
+                    searchWords.some(word =>
                         link.url.toLowerCase().includes(word) || 
                         link.category.toLowerCase().includes(word)
                     )
@@ -37,25 +84,6 @@ searchBar.addEventListener('keyup', (e) => {
                 job
             }));
     });
-
-    displayJobs(filteredJobs);
-});
-
-// Function to load jobs data
-const loadJobs = async () => {
-    try {
-        const res = await fetch('jobs.json'); // Adjust the path if necessary
-        jobData = await res.json(); // Load all categories and jobs
-        displayJobs(jobData.flatMap(category => {
-            return Object.entries(category.jobs).map(([jobTitle, job]) => ({
-                main_category: category.main_category,
-                jobTitle,
-                job
-            }));
-        }));
-    } catch (err) {
-        console.error(err);
-    }
 };
 
 const displayJobs = (jobs) => {
@@ -64,42 +92,48 @@ const displayJobs = (jobs) => {
         const isNewCategory = main_category !== lastCategory;
         lastCategory = main_category;
 
-        const linksHtml = job.links.map(link => {
-            return `
-                <li class="link">
-                    <span class="category">${link.category}</span>
-                    <a href="${link.url}" target="_blank">${link.url}</a>
-                </li>
-            `;
-        }).join('');
-
-        const videosHtml = job.videos.map(video => {
-            const videoId = extractVideoId(video.url);
-            const thumbnailUrl = `https://img.youtube.com/vi/${videoId}/hqdefault.jpg`;
-
-            return `
-                <div class="video-wrapper" data-video-id="${videoId}" style="margin-bottom: 20px;">
-                    <img src="${thumbnailUrl}" class="video-thumbnail" alt="Video thumbnail" style="cursor: pointer;" />
-                </div>
-            `;
-        }).join('');
-
-        // Display degree required in a separate text box next to the job title
         return `
             ${isNewCategory ? `<h2 class="main-category">${main_category}</h2>` : ''}
             <div class="job-section">
                 <h3 class="job-title">${jobTitle}</h3>
-                <span class="degree-box">Degree Required: ${job.degree_required}</span> <!-- Degree info in a separate box -->
-                <ul class="links-list">${linksHtml}</ul>
-                <div class="videos-container">${videosHtml}</div>
+                <span class="degree-box">Degree Required: ${job.degree_required}</span>
+                <ul class="links-list">${generateLinksHtml(job.links)}</ul>
+                <div class="videos-container">${generateVideosHtml(job.videos)}</div>
+                <div class="jobs-table">${job.jobs_table || ''}</div>
             </div>
         `;
     }).join('');
 
     jobslist.innerHTML = htmlString;
     updateMainCategory(jobs);
+    setupVideoThumbnails();
+};
+
+// Function to generate links HTML
+const generateLinksHtml = (links) => {
+    return links.map(link => `
+        <li class="link">
+            <span class="category">${link.category}</span>
+            <a href="${link.url}" target="_blank">${link.url}</a>
+        </li>
+    `).join('');
+};
 
-    // Add event listeners for lazy loading videos
+// Function to generate videos HTML
+const generateVideosHtml = (videos) => {
+    return videos.map(video => {
+        const videoId = extractVideoId(video.url);
+        const thumbnailUrl = `https://img.youtube.com/vi/${videoId}/hqdefault.jpg`;
+        return `
+            <div class="video-wrapper" data-video-id="${videoId}" style="margin-bottom: 20px;">
+                <img src="${thumbnailUrl}" class="video-thumbnail lazy" alt="Video thumbnail" style="cursor: pointer;" />
+            </div>
+        `;
+    }).join('');
+};
+
+// Function to setup lazy loading of videos
+const setupVideoThumbnails = () => {
     document.querySelectorAll('.video-thumbnail').forEach(thumbnail => {
         thumbnail.addEventListener('click', function() {
             const videoWrapper = this.closest('.video-wrapper');
@@ -134,5 +168,12 @@ const extractVideoId = (url) => {
     return urlParams.get('v') || url.split('/').pop();
 };
 
+// Infinite scrolling: load more jobs when scrolling to the bottom
+window.addEventListener('scroll', () => {
+    if (window.innerHeight + window.scrollY >= document.body.offsetHeight) {
+        loadMoreJobs();
+    }
+});
+
 // Load jobs data when the page is ready
 document.addEventListener('DOMContentLoaded', loadJobs);
diff --git a/job tables.html b/job tables.html
index 8da62ba..e4559f2 100644
--- a/job tables.html	
+++ b/job tables.html	
@@ -1,11 +1,4 @@
-
-    <html>
-        <head>
-            <link rel="stylesheet" type="text/css" href="jobs.css">
-            <title>Quick Facts Table</title>
-        </head>
-        <body>
-            <table class="regular-text tooltips" id="quickfacts">
+<table class="regular-text tooltips" id="quickfacts">
 <thead>
 <tr><th colspan="2">Quick Facts: Real Estate Brokers and Sales Agents</th></tr>
 </thead>
@@ -36,7 +29,4 @@
 <td headers="quickfacts-7-0 quickfacts-0-0">    10,100</td>
 </tr>
 <tr><th colspan="2">Source: <a href="https://www.bls.gov/ooh/sales/real-estate-brokers-and-sales-agents.htm#tab-1">www.bls.gov</a></th></tr></tbody>
-</table>
-        </body>
-    </html>
-    
\ No newline at end of file
+</table>
\ No newline at end of file
diff --git a/jobs.json b/jobs.json
index be7b8e3..43cb741 100644
--- a/jobs.json
+++ b/jobs.json
@@ -23,7 +23,8 @@
                         "url": "https://www.youtube.com/watch?v=ovjmCc1R6EU"
                     }
                 ],
-                "degree_required": "Bachelor's"
+                "degree_required": "Bachelor's",
+                "jobs_table": "<table class=\"regular-text tooltips\" id=\"quickfacts\">\n<thead>\n<tr><th colspan=\"2\">Quick Facts Table</th></tr>\n</thead>\n<tbody>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-1-0\" role=\"rowheader\">\t\t\t\t\t\t\t\t\t\t2023     Median Pay\t\t\t\t\t\t\t\t\t\t </th>\n<td headers=\"quickfacts-1-0 quickfacts-0-0\">\n                                        $61,270     per year\n                                        <br/>$29.46     per hour\n                                    </td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-2-0\" role=\"rowheader\">    Typical Entry-Level Education</th>\n<td headers=\"quickfacts-2-0 quickfacts-0-0\">    High school diploma or equivalent</td>\n</tr>\n\n\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-5-0\" role=\"rowheader\">    Number of Jobs, 2023</th>\n<td headers=\"quickfacts-5-0 quickfacts-0-0\">    111,600</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-6-0\" role=\"rowheader\">    Job Outlook, 2023-33</th>\n<td headers=\"quickfacts-6-0 quickfacts-0-0\">-7% (Decline)</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-7-0\" role=\"rowheader\">    Employment Change, 2023-33</th>\n<td headers=\"quickfacts-7-0 quickfacts-0-0\">    -7,400</td>\n</tr>\n<tr><th colspan=\"2\">Source: <a href=\"http://www.bls.gov/ooh/sales/advertising-sales-agents.htm\">www.bls.gov</a></th></tr></tbody>\n</table>"
             },
             "Assistant Bank Manager": {
                 "links": [
@@ -4450,7 +4451,8 @@
                     }
                 ],
                 "videos": [],
-                "degree_required": "Bachelor's"
+                "degree_required": "Bachelor's",
+                "jobs_table": "<table class=\"regular-text tooltips\" id=\"quickfacts\">\n<thead>\n<tr><th colspan=\"2\">Quick Facts Table</th></tr>\n</thead>\n<tbody>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-1-0\" role=\"rowheader\">\t\t\t\t\t\t\t\t\t\t2023     Median Pay\t\t\t\t\t\t\t\t\t\t </th>\n<td headers=\"quickfacts-1-0 quickfacts-0-0\">\n                                        $71,540     per year\n                                        <br/>$34.39     per hour\n                                    </td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-2-0\" role=\"rowheader\">    Typical Entry-Level Education</th>\n<td headers=\"quickfacts-2-0 quickfacts-0-0\">    Bachelor's degree</td>\n</tr>\n\n\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-5-0\" role=\"rowheader\">    Number of Jobs, 2023</th>\n<td headers=\"quickfacts-5-0 quickfacts-0-0\">    8,200</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-6-0\" role=\"rowheader\">    Job Outlook, 2023-33</th>\n<td headers=\"quickfacts-6-0 quickfacts-0-0\">6% (Faster than average)</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-7-0\" role=\"rowheader\">    Employment Change, 2023-33</th>\n<td headers=\"quickfacts-7-0 quickfacts-0-0\">    500</td>\n</tr>\n<tr><th colspan=\"2\">Source: <a href=\"http://www.bls.gov/ooh/legal/arbitrators-mediators-and-conciliators.htm#tab-2\">www.bls.gov</a></th></tr></tbody>\n</table>"
             },
             "Conservation Officer": {
                 "links": [
@@ -4713,7 +4715,8 @@
                         "url": "https://www.youtube.com/watch?v=vh4tRwjfTsA"
                     }
                 ],
-                "degree_required": "Bachelor's"
+                "degree_required": "Bachelor's",
+                "jobs_table": "<table class=\"regular-text tooltips\" id=\"quickfacts\">\n<thead>\n<tr><th colspan=\"2\">Quick Facts Table</th></tr>\n</thead>\n<tbody>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-1-0\" role=\"rowheader\">\t\t\t\t\t\t\t\t\t\t2023     Median Pay\t\t\t\t\t\t\t\t\t\t </th>\n<td headers=\"quickfacts-1-0 quickfacts-0-0\">\n                                        $60,970     per year\n                                        <br/>$29.31     per hour\n                                    </td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-2-0\" role=\"rowheader\">    Typical Entry-Level Education</th>\n<td headers=\"quickfacts-2-0 quickfacts-0-0\">    Associate's degree</td>\n</tr>\n\n\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-5-0\" role=\"rowheader\">    Number of Jobs, 2023</th>\n<td headers=\"quickfacts-5-0 quickfacts-0-0\">    366,200</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-6-0\" role=\"rowheader\">    Job Outlook, 2023-33</th>\n<td headers=\"quickfacts-6-0 quickfacts-0-0\">1% (Slower than average)</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-7-0\" role=\"rowheader\">    Employment Change, 2023-33</th>\n<td headers=\"quickfacts-7-0 quickfacts-0-0\">    4,300</td>\n</tr>\n<tr><th colspan=\"2\">Source: <a href=\"http://www.bls.gov/ooh/legal/paralegals-and-legal-assistants.htm\">www.bls.gov</a></th></tr></tbody>\n</table>"
             },
             "Police Officer": {
                 "links": [
@@ -9119,7 +9122,8 @@
                         "url": "http://www.youtube.com/watch?v=YrYs_9lEGyY"
                     }
                 ],
-                "degree_required": "Bachelor's"
+                "degree_required": "Bachelor's",
+                "jobs_table": "<table class=\"regular-text tooltips\" id=\"quickfacts\">\n<thead>\n<tr><th colspan=\"2\">Quick Facts Table</th></tr>\n</thead>\n<tbody>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-1-0\" role=\"rowheader\">\t\t\t\t\t\t\t\t\t\t2023     Median Pay\t\t\t\t\t\t\t\t\t\t </th>\n<td headers=\"quickfacts-1-0 quickfacts-0-0\">\n                                        $99,700     per year\n                                        <br/>$47.94     per hour\n                                    </td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-2-0\" role=\"rowheader\">    Typical Entry-Level Education</th>\n<td headers=\"quickfacts-2-0 quickfacts-0-0\">    Bachelor's degree</td>\n</tr>\n\n\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-5-0\" role=\"rowheader\">    Number of Jobs, 2023</th>\n<td headers=\"quickfacts-5-0 quickfacts-0-0\">    139,400</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-6-0\" role=\"rowheader\">    Job Outlook, 2023-33</th>\n<td headers=\"quickfacts-6-0 quickfacts-0-0\">-10% (Decline)</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-7-0\" role=\"rowheader\">    Employment Change, 2023-33</th>\n<td headers=\"quickfacts-7-0 quickfacts-0-0\">    -13,400</td>\n</tr>\n<tr><th colspan=\"2\">Source: <a href=\"http://www.bls.gov/ooh/computer-and-information-technology/computer-programmers.htm\">www.bls.gov</a></th></tr></tbody>\n</table>"
             },
             "Data Base Administrator": {
                 "links": [
@@ -9353,7 +9357,8 @@
                         "url": "https://www.youtube.com/watch?v=trvCfinprNw"
                     }
                 ],
-                "degree_required": "Bachelor's"
+                "degree_required": "Bachelor's",
+                "jobs_table": "<table class=\"regular-text tooltips\" id=\"quickfacts\">\n<thead>\n<tr><th colspan=\"2\">Quick Facts Table</th></tr>\n</thead>\n<tbody>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-1-0\" role=\"rowheader\">\t\t\t\t\t\t\t\t\t\t2023     Median Pay\t\t\t\t\t\t\t\t\t\t </th>\n<td headers=\"quickfacts-1-0 quickfacts-0-0\">\n                                        $65,450     per year\n                                        <br/>$31.47     per hour\n                                    </td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-2-0\" role=\"rowheader\">    Typical Entry-Level Education</th>\n<td headers=\"quickfacts-2-0 quickfacts-0-0\">     See How to Become One</td>\n</tr>\n\n\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-5-0\" role=\"rowheader\">    Number of Jobs, 2023</th>\n<td headers=\"quickfacts-5-0 quickfacts-0-0\">    52,100</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-6-0\" role=\"rowheader\">    Job Outlook, 2023-33</th>\n<td headers=\"quickfacts-6-0 quickfacts-0-0\">21% (Much faster than average)</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-7-0\" role=\"rowheader\">    Employment Change, 2023-33</th>\n<td headers=\"quickfacts-7-0 quickfacts-0-0\">    10,800</td>\n</tr>\n<tr><th colspan=\"2\">Source: <a href=\"http://www.bls.gov/ooh/healthcare/occupational-therapy-assistants-and-aides.htm\">www.bls.gov</a></th></tr></tbody>\n</table>"
             },
             "Physical Therapist Assistant": {
                 "links": [
@@ -9952,7 +9957,8 @@
                     }
                 ],
                 "videos": [],
-                "degree_required": "Bachelor's"
+                "degree_required": "Bachelor's",
+                "jobs_table": "<table class=\"regular-text tooltips\" id=\"quickfacts\">\n<thead>\n<tr><th colspan=\"2\">Quick Facts Table</th></tr>\n</thead>\n<tbody>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-1-0\" role=\"rowheader\">\t\t\t\t\t\t\t\t\t\t2023     Median Pay\t\t\t\t\t\t\t\t\t\t </th>\n<td headers=\"quickfacts-1-0 quickfacts-0-0\">\n                                        \n                                        $20.50     per hour\n                                    </td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-2-0\" role=\"rowheader\">    Typical Entry-Level Education</th>\n<td headers=\"quickfacts-2-0 quickfacts-0-0\">    Some college, no degree</td>\n</tr>\n\n\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-5-0\" role=\"rowheader\">    Number of Jobs, 2023</th>\n<td headers=\"quickfacts-5-0 quickfacts-0-0\">    76,000</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-6-0\" role=\"rowheader\">    Job Outlook, 2023-33</th>\n<td headers=\"quickfacts-6-0 quickfacts-0-0\">5% (As fast as average)</td>\n</tr>\n<tr>\n<th headers=\"quickfacts-0-0\" id=\"quickfacts-7-0\" role=\"rowheader\">    Employment Change, 2023-33</th>\n<td headers=\"quickfacts-7-0 quickfacts-0-0\">    3,500</td>\n</tr>\n<tr><th colspan=\"2\">Source: <a href=\"http://www.bls.gov/ooh/entertainment-and-sports/actors.htm\">www.bls.gov</a></th></tr></tbody>\n</table>"
             },
             "Airline Pilot": {
                 "links": [
diff --git a/make_website_abridged.ipynb b/make_website_abridged.ipynb
index 19636bc..ca54820 100644
--- a/make_website_abridged.ipynb
+++ b/make_website_abridged.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -161,11 +161,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "## Generating a site with jobs tables in json file"
+   ]
   }
  ],
  "metadata": {
diff --git a/web_retrieval.ipynb b/web_retrieval.ipynb
index 7dc0e68..058eaf6 100644
--- a/web_retrieval.ipynb
+++ b/web_retrieval.ipynb
@@ -2628,14 +2628,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Quick Facts table with source link has been successfully saved to 'job tables.html'\n"
+      "Error: Unable to find the Quick Facts table. Exception: Message: no such window: target window already closed\n",
+      "from unknown error: web view not found\n",
+      "  (Session info: MicrosoftEdge=130.0.2849.46)\n",
+      "Stacktrace:\n",
+      "\tGetHandleVerifier [0x00007FF69682DC75+12853]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF696AD83C4+2250276]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF696A176A6+1460486]\n",
+      "\t(No symbol) [0x00007FF6965C84C7]\n",
+      "\t(No symbol) [0x00007FF69664CBED]\n",
+      "\t(No symbol) [0x00007FF696660D7A]\n",
+      "\t(No symbol) [0x00007FF696648163]\n",
+      "\t(No symbol) [0x00007FF69661FB54]\n",
+      "\t(No symbol) [0x00007FF69661F000]\n",
+      "\t(No symbol) [0x00007FF69661F741]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF6969ACDB4+1024020]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::~EventProperty [0x00007FF696749A04+57108]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF6969ABDB3+1019923]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF6969ABA19+1019001]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967CE031+394913]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967CA954+380868]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967CAA99+381193]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967C2296+346374]\n",
+      "\tBaseThreadInitThunk [0x00007FF8A22F1FD7+23]\n",
+      "\tRtlUserThreadStart [0x00007FF8A2C5D7D0+32]\n",
+      "\n"
      ]
     }
    ],
@@ -2735,6 +2759,1028 @@
     "# Close the WebDriver after you're done\n",
     "# driver.quit()  # Uncomment this line to close the browser after use\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[2], line 219\u001b[0m\n\u001b[0;32m    216\u001b[0m output_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mjobs.json\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    217\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://www.bls.gov/ooh/sales/real-estate-brokers-and-sales-agents.htm#tab-1\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 219\u001b[0m \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[1;32mIn[2], line 200\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(file_path, start_line, end_line, output_file, url)\u001b[0m\n\u001b[0;32m    198\u001b[0m lines \u001b[38;5;241m=\u001b[39m read_docx(file_path, start_line, end_line)\n\u001b[0;32m    199\u001b[0m job_data \u001b[38;5;241m=\u001b[39m parse_lines(lines)  \u001b[38;5;66;03m# Parse the job data from the Word document\u001b[39;00m\n\u001b[1;32m--> 200\u001b[0m html_table \u001b[38;5;241m=\u001b[39m \u001b[43mscrape_quick_facts\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Scrape the HTML table\u001b[39;00m\n\u001b[0;32m    202\u001b[0m \u001b[38;5;66;03m# Combine the job data with the HTML table\u001b[39;00m\n\u001b[0;32m    203\u001b[0m combined_data \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m    204\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjobs\u001b[39m\u001b[38;5;124m\"\u001b[39m: job_data,\n\u001b[0;32m    205\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjobs_table\u001b[39m\u001b[38;5;124m\"\u001b[39m: html_table\n\u001b[0;32m    206\u001b[0m }\n",
+      "Cell \u001b[1;32mIn[2], line 134\u001b[0m, in \u001b[0;36mscrape_quick_facts\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m    131\u001b[0m driver\u001b[38;5;241m.\u001b[39mget(url)\n\u001b[0;32m    133\u001b[0m \u001b[38;5;66;03m# Allow time for the page to load\u001b[39;00m\n\u001b[1;32m--> 134\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Increase if necessary to allow the page to fully load\u001b[39;00m\n\u001b[0;32m    136\u001b[0m \u001b[38;5;66;03m# Try to find the Quick Facts table\u001b[39;00m\n\u001b[0;32m    137\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    138\u001b[0m     \u001b[38;5;66;03m# Locate the Quick Facts table by its ID\u001b[39;00m\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import json\n",
+    "from docx import Document\n",
+    "from docx.oxml.ns import qn\n",
+    "from selenium import webdriver\n",
+    "from selenium.webdriver.edge.service import Service\n",
+    "from selenium.webdriver.common.by import By\n",
+    "from webdriver_manager.microsoft import EdgeChromiumDriverManager\n",
+    "from bs4 import BeautifulSoup\n",
+    "import time\n",
+    "\n",
+    "# Function to check if run is highlighted and get the highlight color\n",
+    "def get_highlight_color(run):\n",
+    "    highlight_elements = run.element.xpath('.//w:highlight')\n",
+    "    if highlight_elements:\n",
+    "        highlight_element = highlight_elements[0]\n",
+    "        color = highlight_element.get(qn('w:val'))\n",
+    "        return color\n",
+    "    return None\n",
+    "\n",
+    "def read_docx(file_path, start_line, end_line):\n",
+    "    doc = Document(file_path)\n",
+    "    lines = []\n",
+    "    line_count = 0\n",
+    "    for paragraph in doc.paragraphs:\n",
+    "        if start_line <= line_count < end_line:\n",
+    "            para_text = paragraph.text.strip()\n",
+    "            highlight_color = None\n",
+    "            for run in paragraph.runs:\n",
+    "                color = get_highlight_color(run)\n",
+    "                if color:\n",
+    "                    highlight_color = color  # Capture the first highlighted color in the paragraph\n",
+    "                    break\n",
+    "            lines.append((para_text, highlight_color))  # Store line and highlight color as a tuple\n",
+    "        line_count += 1\n",
+    "    return lines\n",
+    "\n",
+    "def parse_lines(lines):\n",
+    "    url_pattern = re.compile(r'https?://\\S+')\n",
+    "    youtube_pattern = re.compile(r'(https?://(?:www\\.)?youtube\\.com/watch\\?v=[\\w-]+|https?://(?:www\\.)?youtu\\.be/[\\w-]+)')\n",
+    "    data = []\n",
+    "    current_main_category = None\n",
+    "    current_job = None\n",
+    "    jobs = {}\n",
+    "\n",
+    "    for line, highlight_color in lines:\n",
+    "        youtube_match = youtube_pattern.search(line)\n",
+    "        if youtube_match:\n",
+    "            # Extract the YouTube URL\n",
+    "            youtube_url = youtube_match.group()\n",
+    "            video_id = re.search(r'(?:v=|youtu\\.be/)([\\w-]+)', youtube_url).group(1)\n",
+    "            \n",
+    "            # Store the video ID instead of the iframe HTML\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['videos'].append({\n",
+    "                    'video_id': video_id,  # Store only the video ID\n",
+    "                    'url': youtube_url      # Optionally store the original URL\n",
+    "                })\n",
+    "        elif url_pattern.search(line):\n",
+    "            # Extract the URL\n",
+    "            url_match = url_pattern.search(line)\n",
+    "            url = url_match.group()\n",
+    "            \n",
+    "            # Extract the category (everything before the URL)\n",
+    "            category = line[:url_match.start()].strip()\n",
+    "\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['links'].append({\n",
+    "                    'url': url,\n",
+    "                    'category': category\n",
+    "                })\n",
+    "        elif line.lower().startswith(\"undefined\"):\n",
+    "            # Skip lines starting with \"undefined\"\n",
+    "            continue\n",
+    "        else:\n",
+    "            if current_main_category is None:\n",
+    "                # Set the main category\n",
+    "                current_main_category = line\n",
+    "            elif current_job is None:\n",
+    "                # Set the job title (sub_category)\n",
+    "                current_job = line\n",
+    "                jobs[current_job] = {\n",
+    "                    'links': [],\n",
+    "                    'videos': [],\n",
+    "                    'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                }\n",
+    "            else:\n",
+    "                # Handle a new main category if a new line appears\n",
+    "                if line.strip() == \"\":\n",
+    "                    if current_main_category:\n",
+    "                        # Save the current main category and its jobs\n",
+    "                        data.append({\n",
+    "                            'main_category': current_main_category,\n",
+    "                            'jobs': jobs\n",
+    "                        })\n",
+    "                        # Reset for the next main category\n",
+    "                        current_main_category = None\n",
+    "                        jobs = {}\n",
+    "                        current_job = None\n",
+    "                else:\n",
+    "                    # If it's neither a URL nor an empty line, it might be a new job\n",
+    "                    if current_job:\n",
+    "                        # Make sure to add the current job to jobs before changing\n",
+    "                        current_job = line\n",
+    "                        jobs[current_job] = {\n",
+    "                            'links': [],\n",
+    "                            'videos': [],\n",
+    "                            'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                        }\n",
+    "\n",
+    "    # Handle the last main category and jobs if they exist\n",
+    "    if current_main_category and jobs:\n",
+    "        data.append({\n",
+    "            'main_category': current_main_category,\n",
+    "            'jobs': jobs\n",
+    "        })\n",
+    "\n",
+    "    return data\n",
+    "\n",
+    "# Function to scrape the Quick Facts table and generate the HTML\n",
+    "def scrape_quick_facts(url):\n",
+    "    # Set up the Edge WebDriver\n",
+    "    options = webdriver.EdgeOptions()\n",
+    "    # Do not use the headless option to see the browser window\n",
+    "    # options.add_argument('--headless')  # Do not use this line\n",
+    "\n",
+    "    # Initialize the Edge WebDriver\n",
+    "    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=options)\n",
+    "\n",
+    "    # Open the URL\n",
+    "    driver.get(url)\n",
+    "\n",
+    "    # Allow time for the page to load\n",
+    "    time.sleep(5)  # Increase if necessary to allow the page to fully load\n",
+    "\n",
+    "    # Try to find the Quick Facts table\n",
+    "    try:\n",
+    "        # Locate the Quick Facts table by its ID\n",
+    "        quickfacts_table = driver.find_element(By.ID, 'quickfacts')\n",
+    "        \n",
+    "        # Get the outer HTML of the table\n",
+    "        quickfacts_html = quickfacts_table.get_attribute('outerHTML')\n",
+    "        \n",
+    "        # Parse the HTML using BeautifulSoup\n",
+    "        soup = BeautifulSoup(quickfacts_html, 'html.parser')\n",
+    "        \n",
+    "        # Remove all links\n",
+    "        for link in soup.find_all('a'):\n",
+    "            link.unwrap()  # Remove the link but keep the text\n",
+    "        \n",
+    "        # Remove specific rows\n",
+    "        rows_to_remove = [\"Work Experience in a Related Occupation\", \"On-the-job Training\"]\n",
+    "        for row in soup.find_all('tr'):\n",
+    "            header = row.find('th')\n",
+    "            if header and header.get_text(strip=True) in rows_to_remove:\n",
+    "                row.decompose()  # Remove the row entirely\n",
+    "        \n",
+    "        # Modify the first row to have a single cell that spans all columns\n",
+    "        first_row = soup.find('tr')\n",
+    "        first_cell = soup.new_tag('th')\n",
+    "        first_cell['colspan'] = '2'  # Set to span all columns\n",
+    "        first_cell.string = 'Quick Facts: Real Estate Brokers and Sales Agents'  # Change this to your desired title\n",
+    "        first_row.clear()  # Clear existing cells in the first row\n",
+    "        first_row.append(first_cell)  # Add the new cell\n",
+    "        \n",
+    "        # Add the source link at the bottom of the table\n",
+    "        source_row = soup.new_tag('tr')\n",
+    "        source_header = soup.new_tag('th')\n",
+    "        source_header['colspan'] = '2'  # Span across two columns\n",
+    "        source_header.string = 'Source: '\n",
+    "        \n",
+    "        # Create the source link\n",
+    "        source_link = soup.new_tag('a', href=url)\n",
+    "        source_link.string = 'www.bls.gov'\n",
+    "        source_header.append(source_link)\n",
+    "        \n",
+    "        source_row.append(source_header)\n",
+    "        soup.find('tbody').append(source_row)  # Add the source row to the table\n",
+    "\n",
+    "        # Get the updated HTML without links and excluded rows\n",
+    "        updated_quickfacts_html = str(soup)\n",
+    "\n",
+    "        # Save the complete HTML to a file\n",
+    "        with open(\"job tables.html\", \"w\", encoding=\"utf-8\") as file:\n",
+    "            file.write(updated_quickfacts_html)\n",
+    "\n",
+    "        print(\"Quick Facts table with source link has been successfully saved to 'job tables.html'\")\n",
+    "\n",
+    "        return updated_quickfacts_html  # Return the HTML for further processing\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(\"Error: Unable to find the Quick Facts table. Exception:\", e)\n",
+    "    finally:\n",
+    "        # Close the WebDriver after you're done\n",
+    "        driver.quit()  # Close the browser\n",
+    "\n",
+    "def main(file_path, start_line, end_line, output_file, url):\n",
+    "    lines = read_docx(file_path, start_line, end_line)\n",
+    "    job_data = parse_lines(lines)  # Parse the job data from the Word document\n",
+    "    html_table = scrape_quick_facts(url)  # Scrape the HTML table\n",
+    "\n",
+    "    # Combine the job data with the HTML table\n",
+    "    combined_data = {\n",
+    "        \"jobs\": job_data,\n",
+    "        \"jobs_table\": html_table\n",
+    "    }\n",
+    "\n",
+    "    # Save the combined data to a JSON file\n",
+    "    with open(output_file, 'w') as f:\n",
+    "        json.dump(combined_data, f, indent=4)\n",
+    "\n",
+    "# Specify the file path and line range\n",
+    "file_path = 'careers.docx'\n",
+    "start_line = 171  # Starting line (inclusive)\n",
+    "end_line = start_line + 3000  # Adjust the ending line as needed\n",
+    "output_file = 'jobs.json'\n",
+    "url = \"https://www.bls.gov/ooh/sales/real-estate-brokers-and-sales-agents.htm#tab-1\"\n",
+    "\n",
+    "main(file_path, start_line, end_line, output_file, url)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 220\u001b[0m\n\u001b[0;32m    217\u001b[0m end_line \u001b[38;5;241m=\u001b[39m start_line \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m3000\u001b[39m  \u001b[38;5;66;03m# Adjust the ending line as needed\u001b[39;00m\n\u001b[0;32m    218\u001b[0m output_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mjobs.json\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 220\u001b[0m \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_file\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[1;32mIn[1], line 205\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(file_path, start_line, end_line, output_file)\u001b[0m\n\u001b[0;32m    202\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m bls_links:\n\u001b[0;32m    203\u001b[0m         \u001b[38;5;66;03m# Scrape the Quick Facts table for this job\u001b[39;00m\n\u001b[0;32m    204\u001b[0m         url \u001b[38;5;241m=\u001b[39m bls_links[\u001b[38;5;241m0\u001b[39m]  \u001b[38;5;66;03m# Use the first bls.gov link found\u001b[39;00m\n\u001b[1;32m--> 205\u001b[0m         html_table \u001b[38;5;241m=\u001b[39m \u001b[43mscrape_quick_facts\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Scrape the HTML table\u001b[39;00m\n\u001b[0;32m    206\u001b[0m         job_info[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mjobs_table\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m html_table  \u001b[38;5;66;03m# Add the HTML table to the job info\u001b[39;00m\n\u001b[0;32m    208\u001b[0m combined_data\u001b[38;5;241m.\u001b[39mappend(job_category)  \u001b[38;5;66;03m# Add the job category with all jobs to combined data\u001b[39;00m\n",
+      "Cell \u001b[1;32mIn[1], line 134\u001b[0m, in \u001b[0;36mscrape_quick_facts\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m    131\u001b[0m driver\u001b[38;5;241m.\u001b[39mget(url)\n\u001b[0;32m    133\u001b[0m \u001b[38;5;66;03m# Allow time for the page to load\u001b[39;00m\n\u001b[1;32m--> 134\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Increase if necessary to allow the page to fully load\u001b[39;00m\n\u001b[0;32m    136\u001b[0m \u001b[38;5;66;03m# Try to find the Quick Facts table\u001b[39;00m\n\u001b[0;32m    137\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    138\u001b[0m     \u001b[38;5;66;03m# Locate the Quick Facts table by its ID\u001b[39;00m\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import json\n",
+    "from docx import Document\n",
+    "from docx.oxml.ns import qn\n",
+    "from selenium import webdriver\n",
+    "from selenium.webdriver.edge.service import Service\n",
+    "from selenium.webdriver.common.by import By\n",
+    "from webdriver_manager.microsoft import EdgeChromiumDriverManager\n",
+    "from bs4 import BeautifulSoup\n",
+    "import time\n",
+    "\n",
+    "# Function to check if run is highlighted and get the highlight color\n",
+    "def get_highlight_color(run):\n",
+    "    highlight_elements = run.element.xpath('.//w:highlight')\n",
+    "    if highlight_elements:\n",
+    "        highlight_element = highlight_elements[0]\n",
+    "        color = highlight_element.get(qn('w:val'))\n",
+    "        return color\n",
+    "    return None\n",
+    "\n",
+    "def read_docx(file_path, start_line, end_line):\n",
+    "    doc = Document(file_path)\n",
+    "    lines = []\n",
+    "    line_count = 0\n",
+    "    for paragraph in doc.paragraphs:\n",
+    "        if start_line <= line_count < end_line:\n",
+    "            para_text = paragraph.text.strip()\n",
+    "            highlight_color = None\n",
+    "            for run in paragraph.runs:\n",
+    "                color = get_highlight_color(run)\n",
+    "                if color:\n",
+    "                    highlight_color = color  # Capture the first highlighted color in the paragraph\n",
+    "                    break\n",
+    "            lines.append((para_text, highlight_color))  # Store line and highlight color as a tuple\n",
+    "        line_count += 1\n",
+    "    return lines\n",
+    "\n",
+    "def parse_lines(lines):\n",
+    "    url_pattern = re.compile(r'https?://\\S+')\n",
+    "    youtube_pattern = re.compile(r'(https?://(?:www\\.)?youtube\\.com/watch\\?v=[\\w-]+|https?://(?:www\\.)?youtu\\.be/[\\w-]+)')\n",
+    "    data = []\n",
+    "    current_main_category = None\n",
+    "    current_job = None\n",
+    "    jobs = {}\n",
+    "\n",
+    "    for line, highlight_color in lines:\n",
+    "        youtube_match = youtube_pattern.search(line)\n",
+    "        if youtube_match:\n",
+    "            # Extract the YouTube URL\n",
+    "            youtube_url = youtube_match.group()\n",
+    "            video_id = re.search(r'(?:v=|youtu\\.be/)([\\w-]+)', youtube_url).group(1)\n",
+    "            \n",
+    "            # Store the video ID instead of the iframe HTML\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['videos'].append({\n",
+    "                    'video_id': video_id,  # Store only the video ID\n",
+    "                    'url': youtube_url      # Optionally store the original URL\n",
+    "                })\n",
+    "        elif url_pattern.search(line):\n",
+    "            # Extract the URL\n",
+    "            url_match = url_pattern.search(line)\n",
+    "            url = url_match.group()\n",
+    "            \n",
+    "            # Extract the category (everything before the URL)\n",
+    "            category = line[:url_match.start()].strip()\n",
+    "\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['links'].append({\n",
+    "                    'url': url,\n",
+    "                    'category': category\n",
+    "                })\n",
+    "        elif line.lower().startswith(\"undefined\"):\n",
+    "            # Skip lines starting with \"undefined\"\n",
+    "            continue\n",
+    "        else:\n",
+    "            if current_main_category is None:\n",
+    "                # Set the main category\n",
+    "                current_main_category = line\n",
+    "            elif current_job is None:\n",
+    "                # Set the job title (sub_category)\n",
+    "                current_job = line\n",
+    "                jobs[current_job] = {\n",
+    "                    'links': [],\n",
+    "                    'videos': [],\n",
+    "                    'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                }\n",
+    "            else:\n",
+    "                # Handle a new main category if a new line appears\n",
+    "                if line.strip() == \"\":\n",
+    "                    if current_main_category:\n",
+    "                        # Save the current main category and its jobs\n",
+    "                        data.append({\n",
+    "                            'main_category': current_main_category,\n",
+    "                            'jobs': jobs\n",
+    "                        })\n",
+    "                        # Reset for the next main category\n",
+    "                        current_main_category = None\n",
+    "                        jobs = {}\n",
+    "                        current_job = None\n",
+    "                else:\n",
+    "                    # If it's neither a URL nor an empty line, it might be a new job\n",
+    "                    if current_job:\n",
+    "                        # Make sure to add the current job to jobs before changing\n",
+    "                        current_job = line\n",
+    "                        jobs[current_job] = {\n",
+    "                            'links': [],\n",
+    "                            'videos': [],\n",
+    "                            'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                        }\n",
+    "\n",
+    "    # Handle the last main category and jobs if they exist\n",
+    "    if current_main_category and jobs:\n",
+    "        data.append({\n",
+    "            'main_category': current_main_category,\n",
+    "            'jobs': jobs\n",
+    "        })\n",
+    "\n",
+    "    return data\n",
+    "\n",
+    "# Function to scrape the Quick Facts table and generate the HTML\n",
+    "def scrape_quick_facts(url):\n",
+    "    # Set up the Edge WebDriver\n",
+    "    options = webdriver.EdgeOptions()\n",
+    "    # Do not use the headless option to see the browser window\n",
+    "    # options.add_argument('--headless')  # Do not use this line\n",
+    "\n",
+    "    # Initialize the Edge WebDriver\n",
+    "    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=options)\n",
+    "\n",
+    "    # Open the URL\n",
+    "    driver.get(url)\n",
+    "\n",
+    "    # Allow time for the page to load\n",
+    "    time.sleep(5)  # Increase if necessary to allow the page to fully load\n",
+    "\n",
+    "    # Try to find the Quick Facts table\n",
+    "    try:\n",
+    "        # Locate the Quick Facts table by its ID\n",
+    "        quickfacts_table = driver.find_element(By.ID, 'quickfacts')\n",
+    "        \n",
+    "        # Get the outer HTML of the table\n",
+    "        quickfacts_html = quickfacts_table.get_attribute('outerHTML')\n",
+    "        \n",
+    "        # Parse the HTML using BeautifulSoup\n",
+    "        soup = BeautifulSoup(quickfacts_html, 'html.parser')\n",
+    "        \n",
+    "        # Remove all links\n",
+    "        for link in soup.find_all('a'):\n",
+    "            link.unwrap()  # Remove the link but keep the text\n",
+    "        \n",
+    "        # Remove specific rows\n",
+    "        rows_to_remove = [\"Work Experience in a Related Occupation\", \"On-the-job Training\"]\n",
+    "        for row in soup.find_all('tr'):\n",
+    "            header = row.find('th')\n",
+    "            if header and header.get_text(strip=True) in rows_to_remove:\n",
+    "                row.decompose()  # Remove the row entirely\n",
+    "        \n",
+    "        # Modify the first row to have a single cell that spans all columns\n",
+    "        first_row = soup.find('tr')\n",
+    "        first_cell = soup.new_tag('th')\n",
+    "        first_cell['colspan'] = '2'  # Set to span all columns\n",
+    "        first_cell.string = 'Quick Facts Table'  # Change this to your desired title\n",
+    "        first_row.clear()  # Clear existing cells in the first row\n",
+    "        first_row.append(first_cell)  # Add the new cell\n",
+    "        \n",
+    "        # Add the source link at the bottom of the table\n",
+    "        source_row = soup.new_tag('tr')\n",
+    "        source_header = soup.new_tag('th')\n",
+    "        source_header['colspan'] = '2'  # Span across two columns\n",
+    "        source_header.string = 'Source: '\n",
+    "        \n",
+    "        # Create the source link\n",
+    "        source_link = soup.new_tag('a', href=url)\n",
+    "        source_link.string = 'www.bls.gov'\n",
+    "        source_header.append(source_link)\n",
+    "        \n",
+    "        source_row.append(source_header)\n",
+    "        soup.find('tbody').append(source_row)  # Add the source row to the table\n",
+    "\n",
+    "        # Get the updated HTML without links and excluded rows\n",
+    "        updated_quickfacts_html = str(soup)\n",
+    "\n",
+    "        return updated_quickfacts_html  # Return the HTML for further processing\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(\"Error: Unable to find the Quick Facts table. Exception:\", e)\n",
+    "    finally:\n",
+    "        # Close the WebDriver after you're done\n",
+    "        driver.quit()  # Close the browser\n",
+    "\n",
+    "def main(file_path, start_line, end_line, output_file):\n",
+    "    lines = read_docx(file_path, start_line, end_line)\n",
+    "    job_data = parse_lines(lines)  # Parse the job data from the Word document\n",
+    "    combined_data = []\n",
+    "\n",
+    "    # Loop through each job entry\n",
+    "    for job_category in job_data:\n",
+    "        jobs = job_category['jobs']\n",
+    "        for job_title, job_info in jobs.items():\n",
+    "            # Check if any of the job links is from bls.gov\n",
+    "            bls_links = [link['url'] for link in job_info['links'] if 'bls.gov' in link['url']]\n",
+    "            if bls_links:\n",
+    "                # Scrape the Quick Facts table for this job\n",
+    "                url = bls_links[0]  # Use the first bls.gov link found\n",
+    "                html_table = scrape_quick_facts(url)  # Scrape the HTML table\n",
+    "                job_info['jobs_table'] = html_table  # Add the HTML table to the job info\n",
+    "\n",
+    "        combined_data.append(job_category)  # Add the job category with all jobs to combined data\n",
+    "\n",
+    "    # Save the combined data to a JSON file\n",
+    "    with open(output_file, 'w') as f:\n",
+    "        json.dump(combined_data, f, indent=4)\n",
+    "\n",
+    "# Specify the file path and line range\n",
+    "file_path = 'careers.docx'\n",
+    "start_line = 171  # Starting line (inclusive)\n",
+    "end_line = start_line + 3000  # Adjust the ending line as needed\n",
+    "output_file = 'jobs.json'\n",
+    "\n",
+    "main(file_path, start_line, end_line, output_file)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing job: Advertising Sales Representative / Agent / Executive in category: Business, Advertising, and Finance\n"
+     ]
+    },
+    {
+     "ename": "OSError",
+     "evalue": "[Errno 22] Invalid argument: 'jobs.json'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mOSError\u001b[0m                                   Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 216\u001b[0m\n\u001b[0;32m    213\u001b[0m end_line \u001b[38;5;241m=\u001b[39m start_line \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m3000\u001b[39m  \u001b[38;5;66;03m# Adjust the ending line as needed\u001b[39;00m\n\u001b[0;32m    214\u001b[0m output_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mjobs.json\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 216\u001b[0m \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_file\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[1;32mIn[1], line 204\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(file_path, start_line, end_line, output_file)\u001b[0m\n\u001b[0;32m    201\u001b[0m combined_data\u001b[38;5;241m.\u001b[39mappend(job_category)  \u001b[38;5;66;03m# Add the job category with all jobs to combined data\u001b[39;00m\n\u001b[0;32m    203\u001b[0m \u001b[38;5;66;03m# Save the combined data to a JSON file incrementally\u001b[39;00m\n\u001b[1;32m--> 204\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mw\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m    205\u001b[0m     json\u001b[38;5;241m.\u001b[39mdump(combined_data, f, indent\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m)\n\u001b[0;32m    206\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProgress saved: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(combined_data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m job categories processed.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python313\\site-packages\\IPython\\core\\interactiveshell.py:324\u001b[0m, in \u001b[0;36m_modified_open\u001b[1;34m(file, *args, **kwargs)\u001b[0m\n\u001b[0;32m    317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[0;32m    318\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    319\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    320\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    321\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    322\u001b[0m     )\n\u001b[1;32m--> 324\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[1;31mOSError\u001b[0m: [Errno 22] Invalid argument: 'jobs.json'"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import json\n",
+    "from docx import Document\n",
+    "from docx.oxml.ns import qn\n",
+    "from selenium import webdriver\n",
+    "from selenium.webdriver.edge.service import Service\n",
+    "from selenium.webdriver.common.by import By\n",
+    "from webdriver_manager.microsoft import EdgeChromiumDriverManager\n",
+    "from bs4 import BeautifulSoup\n",
+    "import time\n",
+    "\n",
+    "# Function to check if run is highlighted and get the highlight color\n",
+    "def get_highlight_color(run):\n",
+    "    highlight_elements = run.element.xpath('.//w:highlight')\n",
+    "    if highlight_elements:\n",
+    "        highlight_element = highlight_elements[0]\n",
+    "        color = highlight_element.get(qn('w:val'))\n",
+    "        return color\n",
+    "    return None\n",
+    "\n",
+    "def read_docx(file_path, start_line, end_line):\n",
+    "    doc = Document(file_path)\n",
+    "    lines = []\n",
+    "    line_count = 0\n",
+    "    for paragraph in doc.paragraphs:\n",
+    "        if start_line <= line_count < end_line:\n",
+    "            para_text = paragraph.text.strip()\n",
+    "            highlight_color = None\n",
+    "            for run in paragraph.runs:\n",
+    "                color = get_highlight_color(run)\n",
+    "                if color:\n",
+    "                    highlight_color = color  # Capture the first highlighted color in the paragraph\n",
+    "                    break\n",
+    "            lines.append((para_text, highlight_color))  # Store line and highlight color as a tuple\n",
+    "        line_count += 1\n",
+    "    return lines\n",
+    "\n",
+    "def parse_lines(lines):\n",
+    "    url_pattern = re.compile(r'https?://\\S+')\n",
+    "    youtube_pattern = re.compile(r'(https?://(?:www\\.)?youtube\\.com/watch\\?v=[\\w-]+|https?://(?:www\\.)?youtu\\.be/[\\w-]+)')\n",
+    "    data = []\n",
+    "    current_main_category = None\n",
+    "    current_job = None\n",
+    "    jobs = {}\n",
+    "\n",
+    "    for line, highlight_color in lines:\n",
+    "        youtube_match = youtube_pattern.search(line)\n",
+    "        if youtube_match:\n",
+    "            # Extract the YouTube URL\n",
+    "            youtube_url = youtube_match.group()\n",
+    "            video_id = re.search(r'(?:v=|youtu\\.be/)([\\w-]+)', youtube_url).group(1)\n",
+    "            \n",
+    "            # Store the video ID instead of the iframe HTML\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['videos'].append({\n",
+    "                    'video_id': video_id,  # Store only the video ID\n",
+    "                    'url': youtube_url      # Optionally store the original URL\n",
+    "                })\n",
+    "        elif url_pattern.search(line):\n",
+    "            # Extract the URL\n",
+    "            url_match = url_pattern.search(line)\n",
+    "            url = url_match.group()\n",
+    "            \n",
+    "            # Extract the category (everything before the URL)\n",
+    "            category = line[:url_match.start()].strip()\n",
+    "\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['links'].append({\n",
+    "                    'url': url,\n",
+    "                    'category': category\n",
+    "                })\n",
+    "        elif line.lower().startswith(\"undefined\"):\n",
+    "            # Skip lines starting with \"undefined\"\n",
+    "            continue\n",
+    "        else:\n",
+    "            if current_main_category is None:\n",
+    "                # Set the main category\n",
+    "                current_main_category = line\n",
+    "            elif current_job is None:\n",
+    "                # Set the job title (sub_category)\n",
+    "                current_job = line\n",
+    "                jobs[current_job] = {\n",
+    "                    'links': [],\n",
+    "                    'videos': [],\n",
+    "                    'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                }\n",
+    "            else:\n",
+    "                # Handle a new main category if a new line appears\n",
+    "                if line.strip() == \"\":\n",
+    "                    if current_main_category:\n",
+    "                        # Save the current main category and its jobs\n",
+    "                        data.append({\n",
+    "                            'main_category': current_main_category,\n",
+    "                            'jobs': jobs\n",
+    "                        })\n",
+    "                        # Reset for the next main category\n",
+    "                        current_main_category = None\n",
+    "                        jobs = {}\n",
+    "                        current_job = None\n",
+    "                else:\n",
+    "                    # If it's neither a URL nor an empty line, it might be a new job\n",
+    "                    if current_job:\n",
+    "                        # Make sure to add the current job to jobs before changing\n",
+    "                        current_job = line\n",
+    "                        jobs[current_job] = {\n",
+    "                            'links': [],\n",
+    "                            'videos': [],\n",
+    "                            'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                        }\n",
+    "\n",
+    "    # Handle the last main category and jobs if they exist\n",
+    "    if current_main_category and jobs:\n",
+    "        data.append({\n",
+    "            'main_category': current_main_category,\n",
+    "            'jobs': jobs\n",
+    "        })\n",
+    "\n",
+    "    return data\n",
+    "\n",
+    "# Function to scrape the Quick Facts table and generate the HTML\n",
+    "def scrape_quick_facts(driver, url):\n",
+    "    driver.get(url)  # Open the URL\n",
+    "    time.sleep(5)  # Allow time for the page to load\n",
+    "\n",
+    "    try:\n",
+    "        # Locate the Quick Facts table by its ID\n",
+    "        quickfacts_table = driver.find_element(By.ID, 'quickfacts')\n",
+    "        \n",
+    "        # Get the outer HTML of the table\n",
+    "        quickfacts_html = quickfacts_table.get_attribute('outerHTML')\n",
+    "        \n",
+    "        # Parse the HTML using BeautifulSoup\n",
+    "        soup = BeautifulSoup(quickfacts_html, 'html.parser')\n",
+    "        \n",
+    "        # Remove all links\n",
+    "        for link in soup.find_all('a'):\n",
+    "            link.unwrap()  # Remove the link but keep the text\n",
+    "        \n",
+    "        # Remove specific rows\n",
+    "        rows_to_remove = [\"Work Experience in a Related Occupation\", \"On-the-job Training\"]\n",
+    "        for row in soup.find_all('tr'):\n",
+    "            header = row.find('th')\n",
+    "            if header and header.get_text(strip=True) in rows_to_remove:\n",
+    "                row.decompose()  # Remove the row entirely\n",
+    "        \n",
+    "        # Modify the first row to have a single cell that spans all columns\n",
+    "        first_row = soup.find('tr')\n",
+    "        first_cell = soup.new_tag('th')\n",
+    "        first_cell['colspan'] = '2'  # Set to span all columns\n",
+    "        first_cell.string = 'Quick Facts Table'  # Change this to your desired title\n",
+    "        first_row.clear()  # Clear existing cells in the first row\n",
+    "        first_row.append(first_cell)  # Add the new cell\n",
+    "        \n",
+    "        # Add the source link at the bottom of the table\n",
+    "        source_row = soup.new_tag('tr')\n",
+    "        source_header = soup.new_tag('th')\n",
+    "        source_header['colspan'] = '2'  # Span across two columns\n",
+    "        source_header.string = 'Source: '\n",
+    "        \n",
+    "        # Create the source link\n",
+    "        source_link = soup.new_tag('a', href=url)\n",
+    "        source_link.string = 'www.bls.gov'\n",
+    "        source_header.append(source_link)\n",
+    "        \n",
+    "        source_row.append(source_header)\n",
+    "        soup.find('tbody').append(source_row)  # Add the source row to the table\n",
+    "\n",
+    "        # Get the updated HTML without links and excluded rows\n",
+    "        updated_quickfacts_html = str(soup)\n",
+    "\n",
+    "        return updated_quickfacts_html  # Return the HTML for further processing\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(\"Error: Unable to find the Quick Facts table. Exception:\", e)\n",
+    "        return None\n",
+    "\n",
+    "def main(file_path, start_line, end_line, output_file):\n",
+    "    lines = read_docx(file_path, start_line, end_line)\n",
+    "    job_data = parse_lines(lines)  # Parse the job data from the Word document\n",
+    "    combined_data = []\n",
+    "\n",
+    "    # Set up the Edge WebDriver\n",
+    "    options = webdriver.EdgeOptions()\n",
+    "    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=options)\n",
+    "\n",
+    "    # Loop through each job entry\n",
+    "    for job_category in job_data:\n",
+    "        jobs = job_category['jobs']\n",
+    "        for job_title, job_info in jobs.items():\n",
+    "            print(f\"Processing job: {job_title} in category: {job_category['main_category']}\")\n",
+    "            \n",
+    "            # Check if any of the job links is from bls.gov\n",
+    "            bls_links = [link['url'] for link in job_info['links'] if 'bls.gov' in link['url']]\n",
+    "            if bls_links:\n",
+    "                # Scrape the Quick Facts table for this job\n",
+    "                url = bls_links[0]  # Use the first bls.gov link found\n",
+    "                html_table = scrape_quick_facts(driver, url)  # Scrape the HTML table\n",
+    "                if html_table:\n",
+    "                    job_info['jobs_table'] = html_table  # Add the HTML table to the job info\n",
+    "\n",
+    "            combined_data.append(job_category)  # Add the job category with all jobs to combined data\n",
+    "            \n",
+    "            # Save the combined data to a JSON file incrementally\n",
+    "            with open(output_file, 'w') as f:\n",
+    "                json.dump(combined_data, f, indent=4)\n",
+    "            print(f\"Progress saved: {len(combined_data)} job categories processed.\")\n",
+    "\n",
+    "    driver.quit()  # Close the browser\n",
+    "\n",
+    "# Specify the file path and line range\n",
+    "file_path = 'careers.docx'\n",
+    "start_line = 171  # Starting line (inclusive)\n",
+    "end_line = start_line + 3000  # Adjust the ending line as needed\n",
+    "output_file = 'jobs.json'\n",
+    "\n",
+    "main(file_path, start_line, end_line, output_file)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing job: Advertising Sales Representative / Agent / Executive in category: Business, Advertising, and Finance\n",
+      "Progress saved: 1 job categories processed.\n",
+      "Processing job: Child Development Specialist in category: Children and Families\n",
+      "Progress saved: 2 job categories processed.\n",
+      "Processing job: Crisis Intervention Counselor in category: Counseling\n",
+      "Progress saved: 3 job categories processed.\n",
+      "Processing job: Admissions Evaluator in category: Education\n",
+      "Progress saved: 4 job categories processed.\n",
+      "Processing job: Child Life Specialist in category: Health and Medical Services\n",
+      "Progress saved: 5 job categories processed.\n",
+      "Processing job: Benefits Manager in category: Human Resources\n",
+      "Progress saved: 6 job categories processed.\n",
+      "Processing job: Arbitrator, Mediator, Conciliator, or Ombudsman in category: Law and Law Enforcement\n",
+      "Progress saved: 7 job categories processed.\n",
+      "Processing job: Paralegal or Legal Assistant in category: Loss Prevention Manager\n",
+      "Progress saved: 8 job categories processed.\n",
+      "Processing job: Army Infantry Officer in category: Military\n",
+      "Progress saved: 9 job categories processed.\n",
+      "Processing job: Academic Psychologist in category: Psychology\n",
+      "Error: Unable to find the Quick Facts table. Exception: Message: no such element: Unable to locate element: {\"method\":\"css selector\",\"selector\":\"[id=\"quickfacts\"]\"}\n",
+      "  (Session info: MicrosoftEdge=130.0.2849.46); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception\n",
+      "Stacktrace:\n",
+      "\tGetHandleVerifier [0x00007FF69682DC75+12853]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF696AD83C4+2250276]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF696A176A6+1460486]\n",
+      "\t(No symbol) [0x00007FF6966297CC]\n",
+      "\t(No symbol) [0x00007FF69662990C]\n",
+      "\t(No symbol) [0x00007FF696663857]\n",
+      "\t(No symbol) [0x00007FF69664853F]\n",
+      "\t(No symbol) [0x00007FF696620617]\n",
+      "\t(No symbol) [0x00007FF696661431]\n",
+      "\t(No symbol) [0x00007FF696648163]\n",
+      "\t(No symbol) [0x00007FF69661FB54]\n",
+      "\t(No symbol) [0x00007FF69661F000]\n",
+      "\t(No symbol) [0x00007FF69661F741]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF6969ACDB4+1024020]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::~EventProperty [0x00007FF696749A04+57108]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF6969ABDB3+1019923]\n",
+      "\tMicrosoft::Applications::Events::EventProperty::empty [0x00007FF6969ABA19+1019001]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967CE031+394913]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967CA954+380868]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967CAA99+381193]\n",
+      "\tMicrosoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6967C2296+346374]\n",
+      "\tBaseThreadInitThunk [0x00007FF8A22F1FD7+23]\n",
+      "\tRtlUserThreadStart [0x00007FF8A2C5D7D0+32]\n",
+      "\n",
+      "Progress saved: 10 job categories processed.\n",
+      "Processing job: Youth Minister in category: Religion and Spirituality\n",
+      "Progress saved: 11 job categories processed.\n",
+      "Processing job: Caseworker or Case Manager in category: Social and Human Services\n",
+      "Progress saved: 12 job categories processed.\n",
+      "Processing job: Activities Director in category: Sport, Recreation, and Fitness\n",
+      "Progress saved: 13 job categories processed.\n",
+      "Processing job: Computer Programmer in category: Technology\n",
+      "Progress saved: 14 job categories processed.\n",
+      "Processing job: Occupational Therapist Assistant in category: Therapy\n",
+      "Progress saved: 15 job categories processed.\n",
+      "Processing job: Actor in category: Other\n",
+      "Progress saved: 16 job categories processed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import json\n",
+    "from docx import Document\n",
+    "from docx.oxml.ns import qn\n",
+    "from selenium import webdriver\n",
+    "from selenium.webdriver.edge.service import Service\n",
+    "from selenium.webdriver.common.by import By\n",
+    "from webdriver_manager.microsoft import EdgeChromiumDriverManager\n",
+    "from bs4 import BeautifulSoup\n",
+    "import time\n",
+    "\n",
+    "# Function to check if run is highlighted and get the highlight color\n",
+    "def get_highlight_color(run):\n",
+    "    highlight_elements = run.element.xpath('.//w:highlight')\n",
+    "    if highlight_elements:\n",
+    "        highlight_element = highlight_elements[0]\n",
+    "        color = highlight_element.get(qn('w:val'))\n",
+    "        return color\n",
+    "    return None\n",
+    "\n",
+    "def read_docx(file_path, start_line, end_line):\n",
+    "    doc = Document(file_path)\n",
+    "    lines = []\n",
+    "    line_count = 0\n",
+    "    for paragraph in doc.paragraphs:\n",
+    "        if start_line <= line_count < end_line:\n",
+    "            para_text = paragraph.text.strip()\n",
+    "            highlight_color = None\n",
+    "            for run in paragraph.runs:\n",
+    "                color = get_highlight_color(run)\n",
+    "                if color:\n",
+    "                    highlight_color = color  # Capture the first highlighted color in the paragraph\n",
+    "                    break\n",
+    "            lines.append((para_text, highlight_color))  # Store line and highlight color as a tuple\n",
+    "        line_count += 1\n",
+    "    return lines\n",
+    "\n",
+    "def parse_lines(lines):\n",
+    "    url_pattern = re.compile(r'https?://\\S+')\n",
+    "    youtube_pattern = re.compile(r'(https?://(?:www\\.)?youtube\\.com/watch\\?v=[\\w-]+|https?://(?:www\\.)?youtu\\.be/[\\w-]+)')\n",
+    "    data = []\n",
+    "    current_main_category = None\n",
+    "    current_job = None\n",
+    "    jobs = {}\n",
+    "\n",
+    "    for line, highlight_color in lines:\n",
+    "        youtube_match = youtube_pattern.search(line)\n",
+    "        if youtube_match:\n",
+    "            # Extract the YouTube URL\n",
+    "            youtube_url = youtube_match.group()\n",
+    "            video_id = re.search(r'(?:v=|youtu\\.be/)([\\w-]+)', youtube_url).group(1)\n",
+    "            \n",
+    "            # Store the video ID instead of the iframe HTML\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['videos'].append({\n",
+    "                    'video_id': video_id,  # Store only the video ID\n",
+    "                    'url': youtube_url      # Optionally store the original URL\n",
+    "                })\n",
+    "        elif url_pattern.search(line):\n",
+    "            # Extract the URL\n",
+    "            url_match = url_pattern.search(line)\n",
+    "            url = url_match.group()\n",
+    "            \n",
+    "            # Extract the category (everything before the URL)\n",
+    "            category = line[:url_match.start()].strip()\n",
+    "\n",
+    "            if current_job and current_job in jobs:\n",
+    "                jobs[current_job]['links'].append({\n",
+    "                    'url': url,\n",
+    "                    'category': category\n",
+    "                })\n",
+    "        elif line.lower().startswith(\"undefined\"):\n",
+    "            # Skip lines starting with \"undefined\"\n",
+    "            continue\n",
+    "        else:\n",
+    "            if current_main_category is None:\n",
+    "                # Set the main category\n",
+    "                current_main_category = line\n",
+    "            elif current_job is None:\n",
+    "                # Set the job title (sub_category)\n",
+    "                current_job = line\n",
+    "                jobs[current_job] = {\n",
+    "                    'links': [],\n",
+    "                    'videos': [],\n",
+    "                    'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                }\n",
+    "            else:\n",
+    "                # Handle a new main category if a new line appears\n",
+    "                if line.strip() == \"\":\n",
+    "                    if current_main_category:\n",
+    "                        # Save the current main category and its jobs\n",
+    "                        data.append({\n",
+    "                            'main_category': current_main_category,\n",
+    "                            'jobs': jobs\n",
+    "                        })\n",
+    "                        # Reset for the next main category\n",
+    "                        current_main_category = None\n",
+    "                        jobs = {}\n",
+    "                        current_job = None\n",
+    "                else:\n",
+    "                    # If it's neither a URL nor an empty line, it might be a new job\n",
+    "                    if current_job:\n",
+    "                        # Make sure to add the current job to jobs before changing\n",
+    "                        current_job = line\n",
+    "                        jobs[current_job] = {\n",
+    "                            'links': [],\n",
+    "                            'videos': [],\n",
+    "                            'degree_required': \"Bachelor's\" if highlight_color == 'cyan' else \"Graduate's\" if highlight_color == 'green' else ''\n",
+    "                        }\n",
+    "\n",
+    "    # Handle the last main category and jobs if they exist\n",
+    "    if current_main_category and jobs:\n",
+    "        data.append({\n",
+    "            'main_category': current_main_category,\n",
+    "            'jobs': jobs\n",
+    "        })\n",
+    "\n",
+    "    return data\n",
+    "\n",
+    "def scrape_quick_facts(driver, url):\n",
+    "    driver.get(url)  # Open the URL\n",
+    "    time.sleep(5)  # Allow time for the page to load\n",
+    "\n",
+    "    try:\n",
+    "        # Locate the Quick Facts table by its ID\n",
+    "        quickfacts_table = driver.find_element(By.ID, 'quickfacts')\n",
+    "        \n",
+    "        # Get the outer HTML of the table\n",
+    "        quickfacts_html = quickfacts_table.get_attribute('outerHTML')\n",
+    "        \n",
+    "        # Parse the HTML using BeautifulSoup\n",
+    "        soup = BeautifulSoup(quickfacts_html, 'html.parser')\n",
+    "        \n",
+    "        # Remove all links\n",
+    "        for link in soup.find_all('a'):\n",
+    "            link.unwrap()  # Remove the link but keep the text\n",
+    "        \n",
+    "        # Remove specific rows\n",
+    "        rows_to_remove = [\"Work Experience in a Related Occupation\", \"On-the-job Training\"]\n",
+    "        for row in soup.find_all('tr'):\n",
+    "            header = row.find('th')\n",
+    "            if header and header.get_text(strip=True) in rows_to_remove:\n",
+    "                row.decompose()  # Remove the row entirely\n",
+    "        \n",
+    "        # Modify the first row to have a single cell that spans all columns\n",
+    "        first_row = soup.find('tr')\n",
+    "        first_cell = soup.new_tag('th')\n",
+    "        first_cell['colspan'] = '2'  # Set to span all columns\n",
+    "        first_cell.string = 'Quick Facts Table'  # Change this to your desired title\n",
+    "        first_row.clear()  # Clear existing cells in the first row\n",
+    "        first_row.append(first_cell)  # Add the new cell\n",
+    "        \n",
+    "        # Add the source link at the bottom of the table\n",
+    "        source_row = soup.new_tag('tr')\n",
+    "        source_header = soup.new_tag('th')\n",
+    "        source_header['colspan'] = '2'  # Span across two columns\n",
+    "        source_header.string = 'Source: '\n",
+    "        \n",
+    "        # Create the source link\n",
+    "        source_link = soup.new_tag('a', href=url)\n",
+    "        source_link.string = 'www.bls.gov'\n",
+    "        source_header.append(source_link)\n",
+    "        \n",
+    "        source_row.append(source_header)\n",
+    "        soup.find('tbody').append(source_row)  # Add the source row to the table\n",
+    "\n",
+    "        # Get the updated HTML without links and excluded rows\n",
+    "        updated_quickfacts_html = str(soup)\n",
+    "\n",
+    "        return updated_quickfacts_html  # Return the HTML for further processing\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(\"Error: Unable to find the Quick Facts table. Exception:\", e)\n",
+    "        return None\n",
+    "\n",
+    "def main(file_path, start_line, end_line, output_file):\n",
+    "    lines = read_docx(file_path, start_line, end_line)\n",
+    "    job_data = parse_lines(lines)  # Parse the job data from the Word document\n",
+    "    combined_data = []\n",
+    "\n",
+    "    # Set up the Edge WebDriver\n",
+    "    options = webdriver.EdgeOptions()\n",
+    "    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=options)\n",
+    "\n",
+    "    # Nested function to process each job and scrape Quick Facts\n",
+    "    def process_job(job_category):\n",
+    "        jobs = job_category['jobs']\n",
+    "        for job_title, job_info in jobs.items():\n",
+    "            print(f\"Processing job: {job_title} in category: {job_category['main_category']}\")\n",
+    "\n",
+    "            # Check if any of the job links is from bls.gov\n",
+    "            bls_links = [link['url'] for link in job_info['links'] if 'bls.gov' in link['url']]\n",
+    "            if bls_links:\n",
+    "                # Scrape the Quick Facts table for this job\n",
+    "                url = bls_links[0]  # Use the first bls.gov link found\n",
+    "                html_table = scrape_quick_facts(driver, url)  # Scrape the HTML table\n",
+    "                if html_table:\n",
+    "                    job_info['jobs_table'] = html_table  # Add the HTML table to the job info\n",
+    "\n",
+    "            return job_category  # Return the job category after processing\n",
+    "\n",
+    "    # Loop through each job entry\n",
+    "    for job_category in job_data:\n",
+    "        processed_job_category = process_job(job_category)  # Process job and scrape Quick Facts\n",
+    "        combined_data.append(processed_job_category)  # Add the processed job category to combined data\n",
+    "\n",
+    "        # Save the combined data to a JSON file incrementally\n",
+    "        with open(output_file, 'w') as f:\n",
+    "            json.dump(combined_data, f, indent=4)\n",
+    "        print(f\"Progress saved: {len(combined_data)} job categories processed.\")\n",
+    "\n",
+    "    driver.quit()  # Close the browser\n",
+    "\n",
+    "# Specify the file path and line range\n",
+    "file_path = 'careers.docx'\n",
+    "start_line = 171  # Starting line (inclusive)\n",
+    "end_line = start_line + 3000  # Adjust the ending line as needed\n",
+    "output_file = 'jobs.json'\n",
+    "\n",
+    "main(file_path, start_line, end_line, output_file)\n"
+   ]
   }
  ],
  "metadata": {