From 3ff65cb9b8f7ed4d83d568713c5681c5789faa69 Mon Sep 17 00:00:00 2001 From: Quan Ming <116425066+wahpiangle@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:11:59 +0800 Subject: [PATCH] community[patch]: Support for specifying api url for firecrawl document loader (#6488) * Support for specifying api url for firecrawl document loader * refactor: remove default apiUrl value * refactor: change comment * Fix --------- Co-authored-by: jacoblee93 --- libs/langchain-community/package.json | 2 +- .../src/document_loaders/web/firecrawl.ts | 18 ++++++++++++++++-- yarn.lock | 15 ++++++++++++++- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 46a8f3accbb5..2d8461513c59 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -82,7 +82,7 @@ "@langchain/scripts": "~0.0.20", "@langchain/standard-tests": "0.0.0", "@layerup/layerup-security": "^1.5.12", - "@mendable/firecrawl-js": "^0.0.13", + "@mendable/firecrawl-js": "^0.0.36", "@mlc-ai/web-llm": "0.2.46", "@mozilla/readability": "^0.4.4", "@neondatabase/serverless": "^0.9.1", diff --git a/libs/langchain-community/src/document_loaders/web/firecrawl.ts b/libs/langchain-community/src/document_loaders/web/firecrawl.ts index 4cb40924e424..05e9dad25857 100644 --- a/libs/langchain-community/src/document_loaders/web/firecrawl.ts +++ b/libs/langchain-community/src/document_loaders/web/firecrawl.ts @@ -18,6 +18,10 @@ interface FirecrawlLoaderParameters { */ apiKey?: string; + /** + * API URL for Firecrawl. + */ + apiUrl?: string; /** * Mode of operation. Can be either "crawl" or "scrape". If not provided, the default value is "crawl". */ @@ -46,6 +50,8 @@ interface FirecrawlDocument { export class FireCrawlLoader extends BaseDocumentLoader { private apiKey: string; + private apiUrl?: string; + private url: string; private mode: "crawl" | "scrape"; @@ -56,6 +62,7 @@ export class FireCrawlLoader extends BaseDocumentLoader { super(); const { apiKey = getEnvironmentVariable("FIRECRAWL_API_KEY"), + apiUrl, url, mode = "crawl", params, @@ -67,18 +74,25 @@ export class FireCrawlLoader extends BaseDocumentLoader { } this.apiKey = apiKey; + this.apiUrl = apiUrl; this.url = url; this.mode = mode; this.params = params; } /** - * Loads the data from the Firecrawl. + * Loads data from Firecrawl. * @returns An array of Documents representing the retrieved data. * @throws An error if the data could not be loaded. */ public async load(): Promise { - const app = new FirecrawlApp({ apiKey: this.apiKey }); + const params: ConstructorParameters[0] = { + apiKey: this.apiKey, + }; + if (this.apiUrl !== undefined) { + params.apiUrl = this.apiUrl; + } + const app = new FirecrawlApp(params); let firecrawlDocs: FirecrawlDocument[]; if (this.mode === "scrape") { diff --git a/yarn.lock b/yarn.lock index e3f188785f20..6712e4ff35b3 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11185,7 +11185,7 @@ __metadata: "@langchain/scripts": ~0.0.20 "@langchain/standard-tests": 0.0.0 "@layerup/layerup-security": ^1.5.12 - "@mendable/firecrawl-js": ^0.0.13 + "@mendable/firecrawl-js": ^0.0.36 "@mlc-ai/web-llm": 0.2.46 "@mozilla/readability": ^0.4.4 "@neondatabase/serverless": ^0.9.1 @@ -12704,6 +12704,19 @@ __metadata: languageName: node linkType: hard +"@mendable/firecrawl-js@npm:^0.0.36": + version: 0.0.36 + resolution: "@mendable/firecrawl-js@npm:0.0.36" + dependencies: + axios: ^1.6.8 + dotenv: ^16.4.5 + uuid: ^9.0.1 + zod: ^3.23.8 + zod-to-json-schema: ^3.23.0 + checksum: 93ac8a7d9d25c04d4f618e282c136af06cf7712ec3402922531094c3cdab0e59d6f484a7f583022032eb58f914a0494193f2fd22986edd0f6712a29545edf95a + languageName: node + linkType: hard + "@microsoft/fetch-event-source@npm:^2.0.1": version: 2.0.1 resolution: "@microsoft/fetch-event-source@npm:2.0.1"