diff --git a/package.json b/package.json index e8c612e..2781be4 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "version": "8.0.15", + "version": "8.0.16", "name": "@extractus/article-extractor", "description": "To extract main article from given URL", "homepage": "https://github.com/extractus/article-extractor", @@ -38,8 +38,8 @@ }, "devDependencies": { "@types/sanitize-html": "^2.13.0", - "eslint": "^9.13.0", - "globals": "^15.11.0", + "eslint": "^9.14.0", + "globals": "^15.12.0", "https-proxy-agent": "^7.0.5", "nock": "^13.5.5" }, diff --git a/src/utils/extractLdSchema.js b/src/utils/extractLdSchema.js index 6a05c0f..f296e7d 100644 --- a/src/utils/extractLdSchema.js +++ b/src/utils/extractLdSchema.js @@ -47,6 +47,13 @@ const parseJson = (text) => { } } +const isAllowedLdJsonType = (ldJson) => { + const rootLdJsonType = ldJson['@type'] || '' + const arr = isArray(rootLdJsonType) ? rootLdJsonType : [rootLdJsonType] + const ldJsonTypes = arr.filter(x => !!x) + return ldJsonTypes.length > 0 && ldJsonTypes.some(x => typeSchemas.includes(x.toLowerCase())) +} + /** * Parses JSON-LD data from a document and populates an entry object. * Only populates if the original entry object is empty or undefined. @@ -59,10 +66,7 @@ export default (document, entry) => { const ldSchemas = document.querySelectorAll('script[type="application/ld+json"]') ldSchemas.forEach(ldSchema => { const ldJson = parseJson(ldSchema.textContent.replace(/[\n\r\t]/g, '')) - const ldJsonType = ldJson['@type']?.toLowerCase() || null - const isAllowedLdJsonType = ldJsonType ? typeSchemas.includes(ldJsonType) : false - - if (ldJson && isAllowedLdJsonType) { + if (ldJson && isAllowedLdJsonType(ldJson)) { Object.entries(attributeLists).forEach(([key, attr]) => { if (!entry[key] || !ldJson[attr]) { return