update

langchain-ai · Dec 10, 2024 · feb480e · feb480e
1 parent 5519606
commit feb480e
Show file tree

Hide file tree

Showing 7 changed files with 237 additions and 6 deletions.
diff --git a/docs/Makefile b/docs/Makefile
@@ -46,6 +46,8 @@ generate-files:
 
 	$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)
 
+	$(PYTHON) scripts/providers_index.py $(INTERMEDIATE_DIR)
+
 	curl https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md | sed 's/<=/\&lt;=/g' > $(INTERMEDIATE_DIR)/langserve.md
 	cp ../SECURITY.md $(INTERMEDIATE_DIR)/security.md
 	$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/

diff --git a/docs/docs/integrations/providers/all.mdx b/docs/docs/integrations/providers/all.mdx
diff --git a/docs/docs/integrations/providers/wandb.mdx b/docs/docs/integrations/providers/wandb.mdx
@@ -1,4 +1,4 @@
-# # Weights & Biases
+# Weights & Biases
 
 >[Weights & Biases](https://wandb.ai/) is provider of the AI developer platform to train and 
 > fine-tune AI models and develop AI applications.

diff --git a/docs/docs/integrations/providers/wandb_tracing.ipynb b/docs/docs/integrations/providers/wandb_tracing.ipynb
@@ -5,7 +5,7 @@
    "id": "5371a9bb",
    "metadata": {},
    "source": [
-    "# WandB Tracing\n",
+    "# Weights & Biases tracing\n",
     "\n",
     "There are two recommended ways to trace your LangChains:\n",
     "\n",

diff --git a/docs/docs/integrations/providers/wandb_tracking.ipynb b/docs/docs/integrations/providers/wandb_tracking.ipynb
@@ -6,10 +6,11 @@
    "id": "e43f4ea0",
    "metadata": {},
    "source": [
-    "# Weights & Biases\n",
+    "# Weights & Biases tracking\n",
     "\n",
-    "This notebook goes over how to track your LangChain experiments into one centralized Weights and Biases dashboard. To learn more about prompt engineering and the callback please refer to this Report which explains both alongside the resultant dashboards you can expect to see.\n",
+    "This notebook goes over how to track your LangChain experiments into one centralized `Weights and Biases` dashboard. \n",
     "\n",
+    "To learn more about prompt engineering and the callback please refer to this notebook which explains both alongside the resultant dashboards you can expect to see:\n",
     "\n",
     "<a href=\"https://colab.research.google.com/drive/1DXH4beT4HFaRKy_Vm4PoxhXVDRf7Ym8L?usp=sharing\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
     "\n",

diff --git a/docs/scripts/providers_index.py b/docs/scripts/providers_index.py
@@ -0,0 +1,105 @@
+import json
+import os
+import re
+import string
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+
+def extract_titles(input_dir: str) -> list[dict[str, str]]:
+    titles = []
+    title_pattern = re.compile(r"^# (.+)")  # Pattern to match '# Title' format
+
+    # Traverse all files in the directory
+    for filename in os.listdir(input_dir):
+        file_path = os.path.join(input_dir, filename)
+
+        if filename.endswith((".md", ".mdx")):
+            # Open markdown files and extract title
+            with open(file_path, "r", encoding="utf-8") as file:
+                for line in file:
+                    match = title_pattern.match(line)
+                    if match:
+                        title = match.group(1)
+                        titles.append({"file": filename, "title": title})
+                        break  # Stop after the first title line
+
+        elif filename.endswith(".ipynb"):
+            # Open Jupyter Notebook files and extract title
+            with open(file_path, "r", encoding="utf-8") as file:
+                notebook_data = json.load(file)
+                # Search in notebook cells for the title
+                for cell in notebook_data.get("cells", []):
+                    if cell.get("cell_type") == "markdown":
+                        for line in cell.get("source", []):
+                            match = title_pattern.match(line)
+                            if match:
+                                title = match.group(1)
+                                titles.append({"file": filename, "title": title})
+                                break
+                    if titles and titles[-1]["file"] == filename:
+                        break  # Stop after finding the first title in the notebook
+
+    return titles
+
+
+def transform_to_links(titles: list[dict[str, str]], prefix: str) -> list[str]:
+    return [
+        f"[{title['title']}]({prefix}{title['file'].split('.')[0]})" for title in titles
+    ]
+
+
+def generate_index_page(items: list[str], num_columns: int = 5) -> str:
+    # Group items by their starting letter (the second character in the string)
+    grouped_items = defaultdict(list)
+    for item in items:
+        first_letter = item[1].upper()
+        if first_letter in string.ascii_uppercase:
+            grouped_items[first_letter].append(item)
+        else:
+            grouped_items["0-9"].append(item)  # Non-alphabetical characters go here
+
+    # Sort groups by letters A-Z
+    sorted_groups = sorted(grouped_items.items())
+
+    # Generate Markdown content
+    content = [
+        "# Providers\n\n",
+        """
+:::info
+If you'd like to write your own integration, see [Extending LangChain](/docs/how_to/#custom).
+
+If you'd like to contribute an integration, see [Contributing integrations](/docs/contributing/integrations/).
+
+:::
+
+""",
+    ]
+    # First part: Menu with links
+    menu_links = " | ".join(f"[{letter}](#{letter})" for letter, _ in sorted_groups)
+    content.append(menu_links + "\n\n")
+    content.append("\n---\n\n")
+
+    # Second part: Grouped items in a single line with separators
+    for letter, items in sorted_groups:
+        content.append(f"### {letter}\n\n")
+        # Sort items within each group and join them in a single line with " | " separator
+        items_line = " | ".join(sorted(items, key=str.casefold))
+        content.append(items_line + "\n\n")
+
+    return "".join(content)
+
+
+if __name__ == "__main__":
+    DOCS_DIR = Path(__file__).parents[1]
+    providers_dir = DOCS_DIR / "docs" / "integrations" / "providers"
+    # "all.mdx" is used for `providers` root directory menu
+    output_file = providers_dir / "all.mdx"
+
+    titles = extract_titles(providers_dir)
+    links = transform_to_links(titles=titles, prefix="/docs/integrations/providers/")
+    mdx_page = generate_index_page(items=links)
+    with open(output_file, "w") as f:
+        f.write(mdx_page)
+        print(f"{output_file} generated successfully")
diff --git a/docs/sidebars.js b/docs/sidebars.js
@@ -171,8 +171,8 @@ module.exports = {
             },
           ],
           link: {
-            type: "generated-index",
-            slug: "integrations/providers/all",
+            type: "doc",
+            id: "integrations/providers/all",
           },
         },
       ],