Skip to content

Commit

Permalink
community[patch]: Support for specifying api url for firecrawl docume…
Browse files Browse the repository at this point in the history
…nt loader (#6488)

* Support for specifying api url for firecrawl document loader

* refactor: remove default apiUrl value

* refactor: change comment

* Fix

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
wahpiangle and jacoblee93 authored Aug 12, 2024
1 parent 4bc1621 commit 3ff65cb
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 4 deletions.
2 changes: 1 addition & 1 deletion libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"@langchain/scripts": "~0.0.20",
"@langchain/standard-tests": "0.0.0",
"@layerup/layerup-security": "^1.5.12",
"@mendable/firecrawl-js": "^0.0.13",
"@mendable/firecrawl-js": "^0.0.36",
"@mlc-ai/web-llm": "0.2.46",
"@mozilla/readability": "^0.4.4",
"@neondatabase/serverless": "^0.9.1",
Expand Down
18 changes: 16 additions & 2 deletions libs/langchain-community/src/document_loaders/web/firecrawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ interface FirecrawlLoaderParameters {
*/
apiKey?: string;

/**
* API URL for Firecrawl.
*/
apiUrl?: string;
/**
* Mode of operation. Can be either "crawl" or "scrape". If not provided, the default value is "crawl".
*/
Expand Down Expand Up @@ -46,6 +50,8 @@ interface FirecrawlDocument {
export class FireCrawlLoader extends BaseDocumentLoader {
private apiKey: string;

private apiUrl?: string;

private url: string;

private mode: "crawl" | "scrape";
Expand All @@ -56,6 +62,7 @@ export class FireCrawlLoader extends BaseDocumentLoader {
super();
const {
apiKey = getEnvironmentVariable("FIRECRAWL_API_KEY"),
apiUrl,
url,
mode = "crawl",
params,
Expand All @@ -67,18 +74,25 @@ export class FireCrawlLoader extends BaseDocumentLoader {
}

this.apiKey = apiKey;
this.apiUrl = apiUrl;
this.url = url;
this.mode = mode;
this.params = params;
}

/**
* Loads the data from the Firecrawl.
* Loads data from Firecrawl.
* @returns An array of Documents representing the retrieved data.
* @throws An error if the data could not be loaded.
*/
public async load(): Promise<DocumentInterface[]> {
const app = new FirecrawlApp({ apiKey: this.apiKey });
const params: ConstructorParameters<typeof FirecrawlApp>[0] = {
apiKey: this.apiKey,
};
if (this.apiUrl !== undefined) {
params.apiUrl = this.apiUrl;
}
const app = new FirecrawlApp(params);
let firecrawlDocs: FirecrawlDocument[];

if (this.mode === "scrape") {
Expand Down
15 changes: 14 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11185,7 +11185,7 @@ __metadata:
"@langchain/scripts": ~0.0.20
"@langchain/standard-tests": 0.0.0
"@layerup/layerup-security": ^1.5.12
"@mendable/firecrawl-js": ^0.0.13
"@mendable/firecrawl-js": ^0.0.36
"@mlc-ai/web-llm": 0.2.46
"@mozilla/readability": ^0.4.4
"@neondatabase/serverless": ^0.9.1
Expand Down Expand Up @@ -12704,6 +12704,19 @@ __metadata:
languageName: node
linkType: hard

"@mendable/firecrawl-js@npm:^0.0.36":
version: 0.0.36
resolution: "@mendable/firecrawl-js@npm:0.0.36"
dependencies:
axios: ^1.6.8
dotenv: ^16.4.5
uuid: ^9.0.1
zod: ^3.23.8
zod-to-json-schema: ^3.23.0
checksum: 93ac8a7d9d25c04d4f618e282c136af06cf7712ec3402922531094c3cdab0e59d6f484a7f583022032eb58f914a0494193f2fd22986edd0f6712a29545edf95a
languageName: node
linkType: hard

"@microsoft/fetch-event-source@npm:^2.0.1":
version: 2.0.1
resolution: "@microsoft/fetch-event-source@npm:2.0.1"
Expand Down

0 comments on commit 3ff65cb

Please sign in to comment.