diff --git a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py index 5ee99a88d..083c260b7 100644 --- a/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py +++ b/packages/jupyter-ai/jupyter_ai/document_loaders/directory.py @@ -1,13 +1,12 @@ +import datetime import hashlib import itertools import os -import datetime import tarfile from pathlib import Path from typing import List import dask -from langchain_community.document_loaders import PyPDFLoader from langchain.schema import Document from langchain.text_splitter import TextSplitter from langchain_community.document_loaders import PyPDFLoader @@ -16,6 +15,7 @@ # Download a single tar file from arXiv and store in a temp folder for RAG, then run learn on it. def arxiv_to_text(id): # id is numbers after "arXiv" in arXiv:xxxx.xxxxx import arxiv + # Get the paper from arxiv outfile = id + datetime.datetime.now().strftime("_%Y-%m-%d-%H-%M") + ".tex" paper = next(arxiv.Client().results(arxiv.Search(id_list=[id])))