Format, rename, fix docs

langchain-ai · Dec 24, 2024 · 58931bf · 58931bf
1 parent d169dde
commit 58931bf
Show file tree

Hide file tree

Showing 8 changed files with 527 additions and 522 deletions.
diff --git a/docs/core_docs/docs/integrations/retrievers/arxiv-retriever.mdx b/docs/core_docs/docs/integrations/retrievers/arxiv-retriever.mdx
@@ -1,40 +1,41 @@
 # ArxivRetriever
+
 ---
 
 ## Overview
 
 The `arXiv Retriever` allows users to query the arXiv database for academic articles. It supports both full-document retrieval (PDF parsing) and summary-based retrieval. For detailed documentation of all ArxivRetriever features and configurations, head to [API reference](#https://arxiv.org/)
 
 ## Features
+
 - Query Flexibility: Search using natural language queries or specific arXiv IDs.
 - Full-Document Retrieval: Option to fetch and parse PDFs.
 - Summaries as Documents: Retrieve summaries for faster results.
 - Customizable Options: Configure maximum results and output format.
 
 ## Integration details
 
-| Retriever        | Source                       | Package                                 |
-| ---------------- | ---------------------------- | --------------------------------------- |
+| Retriever        | Source                       | Package                                                                      |
+| ---------------- | ---------------------------- | ---------------------------------------------------------------------------- |
 | `ArxivRetriever` | Academic articles from arXiv | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) |
 
 ## Setup/Installation
 
 Ensure the following dependencies are installed:
+
 - `pdf-parse` for parsing PDFs
 - `fast-xml-parser` for parsing XML responses from the arXiv API
 
 ```npm2yarn
 npm install pdf-parse fast-xml-parser
 ```
----
-
 
 ## Instantiate the retriever
 
 ```typescript
 const retriever = new ArxivRetriever({
-  getFullDocuments: false, // Set to true to fetch full documents (PDFs)
-  maxSearchResults: 5,     // Maximum number of results to retrieve
+  returnFullDocuments: false, // Set to true to fetch full documents (PDFs)
+  maxSearchResults: 5, // Maximum number of results to retrieve
 });
 ```
 
@@ -46,7 +47,7 @@ Use the `invoke` method to search arXiv for relevant articles. You can use eithe
 const query = "quantum computing";
 
 const documents = await retriever.invoke(query);
-documents.forEach(doc => {
+documents.forEach((doc) => {
   console.log("Title:", doc.metadata.title);
   console.log("Content:", doc.pageContent); // Parsed PDF content
 });
@@ -59,7 +60,10 @@ Like other retrievers, `ArxivRetriever` can be incorporated into LLM application
 ```typescript
 import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
-import { RunnablePassthrough, RunnableSequence } from "@langchain/core/runnables";
+import {
+  RunnablePassthrough,
+  RunnableSequence,
+} from "@langchain/core/runnables";
 import { StringOutputParser } from "@langchain/core/output_parsers";
 import type { Document } from "@langchain/core/documents";
 

diff --git a/examples/src/retrievers/arxiv.ts b/examples/src/retrievers/arxiv.ts
@@ -1,4 +1,4 @@
-import { ArxivRetriever } from "../../../libs/langchain-community/src/retrievers/arxiv.js";
+import { ArxivRetriever } from "@langchain/community/retrievers/arxiv";
 
 export const run = async () => {
   /*
@@ -7,8 +7,8 @@ export const run = async () => {
 
   const queryId = "1605.08386 2103.03404";
   const retrieverById = new ArxivRetriever({
-    getFullDocuments: true,
-    maxSearchResults: 5
+    returnFullDocuments: true,
+    maxSearchResults: 5,
   });
   const documentsById = await retrieverById.invoke(queryId);
   console.log(documentsById);
@@ -41,12 +41,10 @@ export const run = async () => {
   */
 
   const queryNat = "What is the ImageBind model?";
-  const retrieverByNat = new ArxivRetriever(
-    {
-      getFullDocuments: false,
-      maxSearchResults: 2
-    }
-  );
+  const retrieverByNat = new ArxivRetriever({
+    returnFullDocuments: false,
+    maxSearchResults: 2,
+  });
   const documentsByQuery = await retrieverByNat.invoke(queryNat);
   console.log(documentsByQuery);
 
@@ -64,4 +62,4 @@ export const run = async () => {
     }
   ]
   */
-};
+};
diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js
@@ -438,6 +438,7 @@ export const config = {
     "chat_models/zhipuai",
     "retrievers/amazon_kendra",
     "retrievers/amazon_knowledge_base",
+    "retrievers/arxiv",
     "retrievers/dria",
     "retrievers/metal",
     "retrievers/supabase",

diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts
@@ -100,6 +100,7 @@ export const optionalImportEntrypoints: string[] = [
   "langchain_community/callbacks/handlers/upstash_ratelimit",
   "langchain_community/retrievers/amazon_kendra",
   "langchain_community/retrievers/amazon_knowledge_base",
+  "langchain_community/retrievers/arxiv",
   "langchain_community/retrievers/dria",
   "langchain_community/retrievers/metal",
   "langchain_community/retrievers/supabase",

diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts
@@ -57,7 +57,6 @@ export * as chat_models__novita from "../chat_models/novita.js";
 export * as chat_models__ollama from "../chat_models/ollama.js";
 export * as chat_models__togetherai from "../chat_models/togetherai.js";
 export * as chat_models__yandex from "../chat_models/yandex.js";
-export * as retrievers__arxiv from "../retrievers/arxiv.js";
 export * as retrievers__bm25 from "../retrievers/bm25.js";
 export * as retrievers__chaindesk from "../retrievers/chaindesk.js";
 export * as retrievers__databerry from "../retrievers/databerry.js";

diff --git a/libs/langchain-community/src/retrievers/arxiv.ts b/libs/langchain-community/src/retrievers/arxiv.ts
@@ -1,45 +1,51 @@
 import { BaseRetriever, BaseRetrieverInput } from "@langchain/core/retrievers";
 import { Document } from "@langchain/core/documents";
-import { searchArxiv, loadDocsFromResults, getDocsFromSummaries } from '../utils/arxiv.js';
+import {
+  searchArxiv,
+  loadDocsFromResults,
+  getDocsFromSummaries,
+} from "../utils/arxiv.js";
 
 export type ArxivRetrieverOptions = {
-    getFullDocuments?: boolean;
-    maxSearchResults?: number;
+  returnFullDocuments?: boolean;
+  maxSearchResults?: number;
 } & BaseRetrieverInput;
 
 /**
  * A retriever that searches arXiv for relevant articles based on a query.
  * It can retrieve either full documents (PDFs) or just summaries.
  */
 export class ArxivRetriever extends BaseRetriever {
-    static lc_name() {
-        return "ArxivRetriever";
-    }
+  static lc_name() {
+    return "ArxivRetriever";
+  }
 
-    lc_namespace = ["langchain", "retrievers", "arxiv_retriever"];
+  lc_namespace = ["langchain", "retrievers", "arxiv_retriever"];
 
-    getFullDocuments: boolean;
-    maxSearchResults: number;
+  returnFullDocuments = false;
 
-    constructor(options: ArxivRetrieverOptions = {}) {
-        super(options);
-        this.getFullDocuments = options.getFullDocuments ?? false;
-        this.maxSearchResults = options.maxSearchResults ?? 10;
-    }
+  maxSearchResults = 10;
+
+  constructor(options: ArxivRetrieverOptions = {}) {
+    super(options);
+    this.returnFullDocuments =
+      options.returnFullDocuments ?? this.returnFullDocuments;
+    this.maxSearchResults = options.maxSearchResults ?? this.maxSearchResults;
+  }
+
+  async _getRelevantDocuments(query: string): Promise<Document[]> {
+    try {
+      const results = await searchArxiv(query, this.maxSearchResults);
 
-    async _getRelevantDocuments(query: string): Promise<Document[]> {
-        try {
-            const results = await searchArxiv(query, this.maxSearchResults);
-
-            if (this.getFullDocuments) {
-                // Fetch and parse PDFs to get full documents
-                return await loadDocsFromResults(results);
-            } else {
-                // Use summaries as documents
-                return getDocsFromSummaries(results);
-            }
-        } catch (error) {
-            throw new Error(`Error retrieving documents from arXiv.`);
-        }
+      if (this.returnFullDocuments) {
+        // Fetch and parse PDFs to get full documents
+        return await loadDocsFromResults(results);
+      } else {
+        // Use summaries as documents
+        return getDocsFromSummaries(results);
+      }
+    } catch (error) {
+      throw new Error(`Error retrieving documents from arXiv.`);
     }
+  }
 }