diff --git a/.gitignore b/.gitignore index ed2f5c8..1275d8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ node_modules -db.* \ No newline at end of file +db.json +db.*.json +schema.*.json diff --git a/README.md b/README.md index fff33f6..ca414e1 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,47 @@ # Intel Ark Scrapper -``` +## Scrapper Execution + +```bash time node index.js 2>&1 real 262m20.010s user 0m0.295s sys 0m0.983s ``` + +## Schema Induction + +```bash +node schema.js +``` + +## Product Types + +```bash +jq '.[].name' db.json +``` + +## Product Brands + +```bash +jq '.[0].products[].name' db.json +``` + +## Product Series + +```bash +jq '.[0].products[0].subproducts[].name' db.json +``` + +## Products + +```bash +jq '.[0].products[0].subproducts[0].skus[]."Product Name"' db.json +``` + +## Specification Categories + +```bash +jq '.[0].products[0].subproducts[0].skus[0].specs | keys' db.json +``` diff --git a/erd.png b/erd.png new file mode 100644 index 0000000..3375ec2 Binary files /dev/null and b/erd.png differ diff --git a/package-lock.json b/package-lock.json index 8d80ab6..5816bb9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.0.0", "license": "ISC", "dependencies": { + "genson-js": "^0.0.8", "prettier": "^2.8.8", "selenium-webdriver": "^4.9.2" }, @@ -558,6 +559,11 @@ "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" }, + "node_modules/genson-js": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/genson-js/-/genson-js-0.0.8.tgz", + "integrity": "sha512-4NUusDTwF+lzYh72uKV+Uvpky9iPO+YDIMpGImA5pbHfLV9HwgRCA4hYjGu78V4J4Cx2IZRTFfRERn9aUs74mw==" + }, "node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -1655,6 +1661,11 @@ "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" }, + "genson-js": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/genson-js/-/genson-js-0.0.8.tgz", + "integrity": "sha512-4NUusDTwF+lzYh72uKV+Uvpky9iPO+YDIMpGImA5pbHfLV9HwgRCA4hYjGu78V4J4Cx2IZRTFfRERn9aUs74mw==" + }, "glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", diff --git a/package.json b/package.json index 52fb00e..1d43e7e 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "author": "", "license": "ISC", "dependencies": { + "genson-js": "^0.0.8", "prettier": "^2.8.8", "selenium-webdriver": "^4.9.2" }, diff --git a/schema.js b/schema.js new file mode 100644 index 0000000..9866f06 --- /dev/null +++ b/schema.js @@ -0,0 +1,37 @@ +const { createSchema } = require('genson-js'); +const fs = require("fs"); + +fs.readFile("db.json", { encoding: "utf-8" }, async function (err, data) { + const db = JSON.parse(data); + // 1st item is Processors + const ProcessorSchema = createSchema(db[0]); + // 2nd item is Server Products + const ServerProductSchema = createSchema(db[1]); + // 3rd item is Intel NUCs + const IntelNucSchema = createSchema(db[2]); + // 4th item is Wireless + const WirelessSchema = createSchema(db[3]); + // 5th item is Ethernet Products + const EthernetProductsSchema = createSchema(db[4]); + // 6th item is IntelĀ® FPGAs + const IntelFPGAsSchema = createSchema(db[5]); + // 7th item is Memory and Storage + const MemoryandStorageSchema = createSchema(db[6]); + // 8th item is Chipsets + const ChipsetsSchema = createSchema(db[7]); + // 9th item is Graphics + const GraphicsSchema = createSchema(db[8]); + + // Save Schemas + await Promise.all([ + fs.promises.writeFile("schema.processor.json", JSON.stringify(ProcessorSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.serverproducts.json", JSON.stringify(ServerProductSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.intelnucs.json", JSON.stringify(IntelNucSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.wireless.json", JSON.stringify(WirelessSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.ethernetproducts.json", JSON.stringify(EthernetProductsSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.intelfpgas.json", JSON.stringify(IntelFPGAsSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.memoryandstorage.json", JSON.stringify(MemoryandStorageSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.chipsets.json", JSON.stringify(ChipsetsSchema, null, '\t'), "utf-8"), + fs.promises.writeFile("schema.graphics.json", JSON.stringify(GraphicsSchema, null, '\t'), "utf-8") + ]); +}); diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..70c55c6 --- /dev/null +++ b/schema.sql @@ -0,0 +1,84 @@ +CREATE TABLE ProductTypes ( + Id INT NOT NULL, + Name TEXT NOT NULL, + + CONSTRAINT PK_ProductTypes PRIMARY KEY (Id), + CONSTRAINT UQ_ProductTypes_Name UNIQUE (Name) +); + +CREATE TABLE ProductBrands ( + ProductType INT NOT NULL, + Id INT NOT NULL, + Name TEXT NOT NULL, + + CONSTRAINT PK_ProductBrands PRIMARY KEY (ProductType, Id), + CONSTRAINT FK_ProductBrands_ProductTypes FOREIGN KEY (ProductType) REFERENCES ProductTypes, + CONSTRAINT UQ_ProductBrands_Name UNIQUE (Name) +); + +CREATE TABLE ProductSeries ( + ProductType INT NOT NULL, + ProductBrand INT NOT NULL, + Id INT NOT NULL, + Name TEXT NOT NULL, + Url TEXT NOT NULL, + + CONSTRAINT PK_ProductSeries PRIMARY KEY (ProductType, ProductBrand, Id), + CONSTRAINT FK_ProductSeries_ProductBrands FOREIGN KEY (ProductType, ProductBrand) REFERENCES ProductBrands (ProductType, Id), + CONSTRAINT UQ_ProductSeries_Name UNIQUE (Name), + CONSTRAINT UQ_ProductSeries_Url UNIQUE (Url) +); + +CREATE TABLE Products ( + ProductType INT NOT NULL, + ProductBrand INT NOT NULL, + ProductSerie INT NOT NULL, + Id INT NOT NULL, + Name TEXT NOT NULL, + Url TEXT NOT NULL, + + CONSTRAINT PK_Product PRIMARY KEY (ProductType, ProductBrand, ProductSerie, Id), + CONSTRAINT FK_Product_ProductSeries FOREIGN KEY (ProductType, ProductBrand, ProductSerie) REFERENCES ProductSeries (ProductType, ProductBrand, Id), + CONSTRAINT UQ_Product_Name UNIQUE (Name), + CONSTRAINT UQ_Product_Url UNIQUE (Url) +); + +CREATE TABLE SpecificationCategories ( + Id INT NOT NULL, + Name TEXT NOT NULL, + + CONSTRAINT PK_SpecificationCategories PRIMARY KEY (Id), + CONSTRAINT UQ_SpecificationCategories_Name UNIQUE (Name) +); + +CREATE TABLE SpecificationProperties ( + SpecificationCategory INT NOT NULL, + Id INT NOT NULL, + Name TEXT NOT NULL, + + CONSTRAINT PK_SpecificationProperties PRIMARY KEY (SpecificationCategory, Id), + CONSTRAINT FK_SpecificationProperties_SpecificationCategories FOREIGN KEY (SpecificationCategory) REFERENCES SpecificationCategories (Id), + CONSTRAINT UQ_SpecificationProperties_Name UNIQUE (Name) +); + +CREATE TABLE ProductsSpecifications ( + ProductType INT NOT NULL, + ProductBrand INT NOT NULL, + ProductSerie INT NOT NULL, + ProductId INT NOT NULL, + SpecificationCategory INT NOT NULL, + SpecificationProperty INT NOT NULL, + Value TEXT NOT NULL, + + CONSTRAINT PK_ProductsSpecifications PRIMARY KEY (ProductType, ProductBrand, ProductSerie, ProductId, SpecificationCategory, SpecificationProperty), + CONSTRAINT FK_ProductsSpecifications_Products FOREIGN KEY (ProductType, ProductBrand, ProductSerie, ProductId) REFERENCES Products (ProductType, ProductBrand, ProductSerie, Id), + CONSTRAINT FK_ProductsSpecifications_SpecificationProperties FOREIGN KEY (SpecificationCategory, SpecificationProperty) REFERENCES SpecificationProperties (SpecificationCategory, Id) +); + +DROP TABLE ProductTypes; +DROP TABLE ProductBrands; +DROP TABLE ProductSeries; +DROP TABLE Products; +DROP TABLE SpecificationCategories; +DROP TABLE SpecificationProperties; +DROP TABLE ProductsSpecifications;