diff --git a/README.md b/README.md index ebf8410..22165fb 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,6 @@ yarn add @extractus/oembed-extractor ``` ```ts -// es6 module import { extract } from '@extractus/oembed-extractor' const result = await extract('https://www.youtube.com/watch?v=x2bqscVkGxk') @@ -39,10 +38,6 @@ console.log(result) ### Deno ```ts -// deno < 1.28 -import { extract } from 'https://esm.sh/@extractus/oembed-extractor' - -// deno > 1.28 import { extract } from 'npm:@extractus/oembed-extractor' ``` diff --git a/deno.json b/deno.json deleted file mode 100644 index a6c408f..0000000 --- a/deno.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "imports": { - "cross-fetch": "./src/deno/cross-fetch.js" - } -} diff --git a/examples/browser-oembed-parser/package.json b/examples/browser-oembed-parser/package.json index a189f4e..d4844d7 100644 --- a/examples/browser-oembed-parser/package.json +++ b/examples/browser-oembed-parser/package.json @@ -6,7 +6,7 @@ "start": "node server" }, "dependencies": { - "express": "^4.18.2", - "got": "^13.0.0" + "express": "latest", + "got": "latest" } } diff --git a/examples/bun-oembed-parser/package.json b/examples/bun-oembed-parser/package.json index 6a70e3c..9ed8b15 100644 --- a/examples/bun-oembed-parser/package.json +++ b/examples/bun-oembed-parser/package.json @@ -5,10 +5,10 @@ "start": "bun run index.ts" }, "devDependencies": { - "bun-types": "^0.6.13" + "bun-types": "latest" }, "dependencies": { - "hono": "^3.2.7", + "hono": "latest", "@extractus/oembed-extractor": "latest" } } diff --git a/examples/deno-oembed-parser/index.ts b/examples/deno-oembed-parser/index.ts index 2b1ec52..51c4497 100644 --- a/examples/deno-oembed-parser/index.ts +++ b/examples/deno-oembed-parser/index.ts @@ -1,6 +1,6 @@ import { serve } from 'https://deno.land/std/http/server.ts' -import { Hono } from 'https://deno.land/x/hono@v3.2.7/mod.ts' +import { Hono } from 'https://deno.land/x/hono/mod.ts' import { extract } from 'npm:@extractus/oembed-extractor' diff --git a/examples/node-oembed-parser/package.json b/examples/node-oembed-parser/package.json index 70bd9be..5592617 100644 --- a/examples/node-oembed-parser/package.json +++ b/examples/node-oembed-parser/package.json @@ -7,7 +7,7 @@ "start": "node index.js" }, "dependencies": { - "express": "^4.18.2", + "express": "latest", "@extractus/oembed-extractor": "latest" } } diff --git a/examples/tsnode-oembed-parser/package.json b/examples/tsnode-oembed-parser/package.json index 99280ef..c3128b4 100644 --- a/examples/tsnode-oembed-parser/package.json +++ b/examples/tsnode-oembed-parser/package.json @@ -7,10 +7,10 @@ "start": "node dist/index.js" }, "devDependencies": { - "typescript": "^5.1.6" + "typescript": "latest" }, "dependencies": { - "express": "^4.18.2", + "express": "latest", "@extractus/oembed-extractor": "latest" } } diff --git a/package.json b/package.json index 855f357..5f4cf07 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "version": "4.0.4", + "version": "4.0.5", "name": "@extractus/oembed-extractor", "description": "Get oEmbed data from given URL.", "homepage": "https://github.com/extractus/oembed-extractor", @@ -11,10 +11,12 @@ "main": "./src/main.js", "type": "module", "imports": { - "cross-fetch": "./src/deno/cross-fetch.js" + "cross-fetch": "./src/deno/cross-fetch.js", + "linkedom": "https://deno.land/x/deno_dom@v0.1.45/deno-dom-wasm.ts" }, "browser": { - "cross-fetch": "./src/deno/cross-fetch.js" + "cross-fetch": "./src/deno/cross-fetch.js", + "linkedom": "./src/browser/linkedom.js" }, "types": "./index.d.ts", "engines": { @@ -30,11 +32,12 @@ "reset": "node reset" }, "dependencies": { - "cross-fetch": "^4.0.0" + "cross-fetch": "^4.0.0", + "linkedom": "^0.16.11" }, "devDependencies": { - "eslint": "^9.1.1", - "globals": "^15.0.0", + "eslint": "^9.2.0", + "globals": "^15.1.0", "https-proxy-agent": "^7.0.4", "jest": "^29.7.0", "nock": "^13.5.4" diff --git a/src/browser/linkedom.js b/src/browser/linkedom.js new file mode 100644 index 0000000..6d5be04 --- /dev/null +++ b/src/browser/linkedom.js @@ -0,0 +1 @@ +export const DOMParser = window.DOMParser diff --git a/src/main.js b/src/main.js index 2c3c065..a9dbe78 100644 --- a/src/main.js +++ b/src/main.js @@ -1,6 +1,7 @@ // main.js import { isValid as isValidURL } from './utils/linker.js' +import extractWithDiscovery from './utils/autoDiscovery.js' import fetchEmbed from './utils/fetchEmbed.js' import { getEndpoint } from './utils/provider.js' @@ -10,12 +11,10 @@ export const extract = async (url, params = {}, options = {}) => { throw new Error('Invalid input URL') } const endpoint = getEndpoint(url) - if (!endpoint) { - throw new Error(`No provider found with given url "${url}"`) - } - const data = await fetchEmbed(url, params, endpoint, options) - return data + return endpoint + ? fetchEmbed(url, params, endpoint, options) + : extractWithDiscovery(url, params, options) } export { diff --git a/src/utils/autoDiscovery.js b/src/utils/autoDiscovery.js new file mode 100644 index 0000000..c080292 --- /dev/null +++ b/src/utils/autoDiscovery.js @@ -0,0 +1,23 @@ +// utils -> autoDiscovery.js + +import { DOMParser } from 'linkedom' + +import { getHtml, getJson } from './retrieve.js' + +export default async (url, params = {}, options = {}) => { + const html = await getHtml(url, options) + const doc = new DOMParser().parseFromString(html, 'text/html') + const elm = doc.querySelector('link[type="application/json+oembed"]') + const href = elm.getAttribute('href') + const q = new URL(href) + const { origin, pathname, searchParams } = q + Object.keys(params).forEach((key) => { + if (!searchParams.has(key)) { + searchParams.append(key, params[key]) + } + }) + const link = `${origin}${pathname}?${searchParams.toString()}` + const body = await getJson(link, options) + body.method = 'auto-discovery' + return body +} diff --git a/src/utils/autoDiscovery.test.js b/src/utils/autoDiscovery.test.js new file mode 100644 index 0000000..701ae69 --- /dev/null +++ b/src/utils/autoDiscovery.test.js @@ -0,0 +1,53 @@ +// autoDiscovery.test +/* eslint-env jest */ + +import nock from 'nock' + +import autoDiscovery from './autoDiscovery.js' + +const parseUrl = (url) => { + const re = new URL(url) + return { + baseUrl: `${re.protocol}//${re.host}`, + path: re.pathname, + } +} + +describe('test if autoDiscovery() works correctly', () => { + const url = 'https://www.bitchute.com/video/8hXWnkvA8Ao/' + test(`check fetchEmbed("${url}")`, async () => { + const htmlFile = './test-data/bitchute.html' + const jsonFile = './test-data/bitchute.json' + + const { baseUrl, path } = parseUrl(url) + const scope = nock(baseUrl) + scope.get(path) + .replyWithFile(200, htmlFile, { + 'Content-Type': 'text/html', + }) + + const endpoint = 'https://www.bitchute.com/oembed/' + const { baseUrl: endpointBaseUrl, path: endpointPath } = parseUrl(endpoint) + + const params = { + maxwidth: 600, + maxheight: 400, + } + + const jsonScope = nock(endpointBaseUrl, { encodedQueryParams: true }) + const queries = new URLSearchParams({ + url: 'https://www.bitchute.com/video/8hXWnkvA8Ao/', + format: 'json', + ...params, + }) + jsonScope.get(endpointPath) + .query(queries) + .replyWithFile(200, jsonFile, { + 'Content-Type': 'application/json', + }) + + const result = await autoDiscovery(url, params) + expect(result).toBeTruthy() + nock.cleanAll() + }) +}) diff --git a/src/utils/fetchEmbed.js b/src/utils/fetchEmbed.js index b8ce126..d915db7 100644 --- a/src/utils/fetchEmbed.js +++ b/src/utils/fetchEmbed.js @@ -1,6 +1,6 @@ // utils -> fetchEmbed -import retrieve from './retrieve.js' +import { getJson } from './retrieve.js' import { getDomain } from './linker.js' const isFacebookGraphDependent = (url) => { @@ -34,6 +34,7 @@ export default async (url, params = {}, endpoint = '', options = {}) => { // esl const queryParams = new URLSearchParams(query).toString() const link = endpoint + '?' + queryParams - const body = retrieve(link, options) + const body = await getJson(link, options) + body.method = 'provider-api' return body } diff --git a/src/utils/providers.latest.js b/src/utils/providers.latest.js index a04d9ae..3af32dd 100644 --- a/src/utils/providers.latest.js +++ b/src/utils/providers.latest.js @@ -1,4 +1,4 @@ -// provider data, synchronized at 2024-04-26T08:46:02.055Z +// provider data, synchronized at 2024-05-07T10:41:10.221Z /* eslint-disable */ @@ -198,6 +198,12 @@ export const providers = [ ], "e": "www.behance.net/services/oembed" }, + { + "s": [ + "cloud\\.biqapp\\.com/*" + ], + "e": "biqapp.com/api/v1/video/oembed" + }, { "s": [ "blackfire\\.io/profiles/*/graph", @@ -277,6 +283,12 @@ export const providers = [ ], "e": "img.catbo.at/oembed.json" }, + { + "s": [ + "embeds\\.celero\\.io/*" + ], + "e": "api.celero.io/api/oembed" + }, { "s": [ "view\\.ceros\\.com/*" @@ -816,6 +828,12 @@ export const providers = [ ], "e": "www.hulu.com/api/oembed.json" }, + { + "s": [ + "oembed\\.ideamapper\\.com/*" + ], + "e": "oembed.ideamapper.com" + }, { "s": [ "*\\.idomoo\\.com/*" diff --git a/src/utils/providers.orginal.json b/src/utils/providers.orginal.json index 576b00c..886c7d0 100644 --- a/src/utils/providers.orginal.json +++ b/src/utils/providers.orginal.json @@ -415,6 +415,19 @@ } ] }, + { + "provider_name": "biqnetwork", + "provider_url": "https://biqapp.com/", + "endpoints": [ + { + "schemes": [ + "https://cloud.biqapp.com/*" + ], + "url": "https://biqapp.com/api/v1/video/oembed", + "discovery": true + } + ] + }, { "provider_name": "Blackfire.io", "provider_url": "https://blackfire.io", @@ -591,6 +604,22 @@ } ] }, + { + "provider_name": "Celero", + "provider_url": "https://www.celero.io", + "endpoints": [ + { + "schemes": [ + "https://embeds.celero.io/*" + ], + "url": "https://api.celero.io/api/oembed", + "discovery": true, + "formats": [ + "json" + ] + } + ] + }, { "provider_name": "Ceros", "provider_url": "http://www.ceros.com/", @@ -1698,6 +1727,19 @@ } ] }, + { + "provider_name": "Ideamapper", + "provider_url": "http://oembed.ideamapper.com", + "endpoints": [ + { + "schemes": [ + "http://oembed.ideamapper.com/*" + ], + "url": "http://oembed.ideamapper.com", + "discovery": true + } + ] + }, { "provider_name": "Idomoo", "provider_url": "https://idomoo.com/", diff --git a/src/utils/providers.prev.js b/src/utils/providers.prev.js index 96afe1c..a04d9ae 100644 --- a/src/utils/providers.prev.js +++ b/src/utils/providers.prev.js @@ -1,4 +1,4 @@ -// provider data, synchronized at 2024-03-29T04:36:42.975Z +// provider data, synchronized at 2024-04-26T08:46:02.055Z /* eslint-disable */ @@ -212,6 +212,12 @@ export const providers = [ ], "e": "blogcast.host/oembed" }, + { + "s": [ + "bsky\\.app/profile/*/post/*" + ], + "e": "embed.bsky.app/oembed" + }, { "s": [ "www\\.bookingmood\\.com/embed/*/*" @@ -1198,6 +1204,12 @@ export const providers = [ ], "e": "ndla.no/oembed" }, + { + "s": [ + "*\\.neetorecord\\.com/watch/*" + ], + "e": "api.neetorecord.com/api/v1/oembed" + }, { "s": [ "*\\.nfb\\.ca/film/*" @@ -1559,6 +1571,13 @@ export const providers = [ ], "e": "embed.sendtonews.com/services/oembed" }, + { + "s": [ + "shopshare\\.tv/shopboard/*", + "shopshare\\.tv/shopcast/*" + ], + "e": "shopshare.tv/api/shopcast/oembed" + }, { "s": [ "www\\.shortnote\\.jp/view/notes/*" @@ -1830,6 +1849,12 @@ export const providers = [ ], "e": "app-test.totango.com/oembed" }, + { + "s": [ + "trackspace\\.upitup\\.com/*" + ], + "e": "trackspace.upitup.com/oembed" + }, { "s": [ "trinitymedia\\.ai/player/*", diff --git a/src/utils/retrieve.js b/src/utils/retrieve.js index e987f63..e60f662 100644 --- a/src/utils/retrieve.js +++ b/src/utils/retrieve.js @@ -15,7 +15,28 @@ const profetch = async (url, options = {}) => { return res } -export default async (url, options = {}) => { +export const getHtml = async (url, options = {}) => { + const { + headers = { + 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0', + }, + proxy = null, + agent = null, + signal = null, + } = options + + const res = proxy ? await profetch(url, { proxy, signal }) : await fetch(url, { headers, agent, signal }) + + const status = res.status + if (status >= 400) { + throw new Error(`Request failed with error code ${status}`) + } + + const text = await res.text() + return text +} + +export const getJson = async (url, options = {}) => { const { headers = { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0', diff --git a/src/utils/retrieve.test.js b/src/utils/retrieve.test.js index a927dc3..c67f671 100644 --- a/src/utils/retrieve.test.js +++ b/src/utils/retrieve.test.js @@ -3,7 +3,7 @@ import nock from 'nock' -import retrieve from './retrieve.js' +import { getJson } from './retrieve.js' const parseUrl = (url) => { const re = new URL(url) @@ -13,19 +13,19 @@ const parseUrl = (url) => { } } -describe('test retrieve() method', () => { - test('test retrieve from good source', async () => { +describe('test getJson() method', () => { + test('test getJson from good source', async () => { const url = 'https://some.where/good/source' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(200, { data: { name: 'oembed-parser' } }, { 'Content-Type': 'application/json', }) - const result = await retrieve(url) + const result = await getJson(url) expect(result.data.name).toEqual('oembed-parser') nock.cleanAll() }) - test('test retrieve using proxy', async () => { + test('test getJson using proxy', async () => { const url = 'https://some.where/good/source-with-proxy' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(200, { data: { name: 'oembed-parser' } }, { @@ -35,7 +35,7 @@ describe('test retrieve() method', () => { .get('/api/proxy?url=https%3A%2F%2Fsome.where%2Fgood%2Fsource-with-proxy') .reply(200, { data: { name: 'oembed-parser' } }) - const result = await retrieve(url, { + const result = await getJson(url, { proxy: { target: 'https://proxy-server.com/api/proxy?url=', }, @@ -44,14 +44,14 @@ describe('test retrieve() method', () => { nock.cleanAll() }) - test('test retrieve invalid json reponsse', async () => { + test('test getJson invalid json reponsse', async () => { const url = 'https://some.where/bad/source' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(200, 'this is not json string', { 'Content-Type': 'application/json', }) try { - await retrieve(url) + await getJson(url) } catch (err) { expect(err).toBeTruthy() } diff --git a/sync.js b/sync.js index 0b951da..ddc2f51 100644 --- a/sync.js +++ b/sync.js @@ -5,7 +5,7 @@ import { writeFileSync } from 'node:fs' -import retrieve from './src/utils/retrieve.js' +import { getJson } from './src/utils/retrieve.js' import { simplify } from './src/utils/provider.js' const source = 'https://oembed.com/providers.json' @@ -23,7 +23,7 @@ const saveOriginal = (data, file) => { const sync = async () => { try { - const result = await retrieve(source) + const result = await getJson(source) saveOriginal(result, orginal) const arr = simplify(result) diff --git a/test-data/bitchute.html b/test-data/bitchute.html new file mode 100644 index 0000000..0f7b80a --- /dev/null +++ b/test-data/bitchute.html @@ -0,0 +1,74 @@ + + + + + 2023 Praemium Imperiale White House Program + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test-data/bitchute.json b/test-data/bitchute.json new file mode 100644 index 0000000..f5588ac --- /dev/null +++ b/test-data/bitchute.json @@ -0,0 +1 @@ +{"title": "2023 Praemium Imperiale White House Program", "provider_url": "https://www.bitchute.com/", "author_name": "TheWhiteHouse", "thumbnail_url": "https://static-3.bitchute.com/live/cover_images/zWsYVmCOu4JA/8hXWnkvA8Ao_320x180.jpg", "html": "", "author_url": "https://www.bitchute.com/channel/zWsYVmCOu4JA/", "height": "344", "thumbnail_height": "180", "version": "1.0", "type": "video", "thumbnail_width": "320", "provider_name": "BitChute", "width": "459"} \ No newline at end of file