From 31555c84da76d9e145433b0159746bae395c146c Mon Sep 17 00:00:00 2001 From: Nicolas Oliver Date: Mon, 22 May 2023 18:54:54 -0700 Subject: [PATCH] updates * add function to get specs for sku * update gitignore to ignore program output * add some metrics to README --- .gitignore | 1 + README.md | 9 ++++++++ index.js | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 README.md diff --git a/.gitignore b/.gitignore index 3c3629e..ed2f5c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +db.* \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..fff33f6 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# Intel Ark Scrapper + +``` +time node index.js 2>&1 + +real 262m20.010s +user 0m0.295s +sys 0m0.983s +``` diff --git a/index.js b/index.js index ee5f803..be786ee 100644 --- a/index.js +++ b/index.js @@ -112,6 +112,46 @@ async function getListOfSKUsForSubProduct(driver) { return result; } +async function getSpecsForSKU(driver) { + let xpath = '//section[@class="blade specs-blade specifications"]'; + let locator = By.xpath(xpath); + let specs = await driver.findElements(locator); + + let result = {}; + + for (let i = 0; i < specs.length; i++) { + let xpath1 = `//section[@class="blade specs-blade specifications"][${ + i + 1 + }]//h2`; + let locator1 = By.xpath(xpath1); + let specTitle = ( + await driver.findElement(locator1).getAttribute("innerText") + ).trim(); + + result[specTitle] = {}; + + let xpath2 = `//section[@class="blade specs-blade specifications"][${ + i + 1 + }]//ul[@class="specs-list"]//li//span[@class="label"]`; + let locator2 = By.xpath(xpath2); + let specLabels = await driver.findElements(locator2); + + let xpath3 = `//section[@class="blade specs-blade specifications"][${ + i + 1 + }]//ul[@class="specs-list"]//li//span[@class="value"]`; + let locator3 = By.xpath(xpath3); + let specValues = await driver.findElements(locator3); + + for (let j = 0; j < specLabels.length; j++) { + let specLabel = (await specLabels[j].getAttribute("innerText")).trim(); + let specValue = (await specValues[j].getAttribute("innerText")).trim(); + result[specTitle][specLabel] = specValue; + } + } + + return result; +} + async function main() { await driver.get("https://ark.intel.com/"); @@ -146,6 +186,26 @@ async function main() { } } + for (let i in categoryList) { + let category = categoryList[i]; + for (let j in category.products) { + let product = category.products[j]; + for (let k in product.subproducts) { + let subproduct = product.subproducts[k]; + for (let l in subproduct.skus) { + let sku = subproduct.skus[l]; + console.log(sku.Url); + await driver.get(sku.Url); + try { + sku.specs = await getSpecsForSKU(driver); + } catch (e) { + console.error(e); + } + } + } + } + } + await fs.promises.writeFile("db.json", JSON.stringify(categoryList), "utf-8"); }