forked from openai/chatgpt-retrieval-plugin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
file.py
64 lines (45 loc) · 1.69 KB
/
file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
from io import BufferedReader
from typing import Optional
from fastapi import UploadFile
from models.models import Document, DocumentMetadata
async def get_document_from_file(
file: UploadFile, metadata: DocumentMetadata
) -> Document:
extracted_text = await extract_text_from_form_file(file)
doc = Document(text=extracted_text, metadata=metadata)
return doc
def extract_text_from_filepath(filepath: str, mimetype: Optional[str] = None) -> str:
"""Return the text content of a file given its filepath."""
try:
with open(filepath, "rb") as file:
extracted_text = extract_text_from_file(file, mimetype)
except Exception as e:
print(f"Error: {e}")
raise e
return extracted_text
def extract_text_from_file(file: BufferedReader, mimetype: str) -> str:
extracted_text = file.read().decode("utf-8")
return extracted_text
# Extract text from a file based on its mimetype
async def extract_text_from_form_file(file: UploadFile):
"""Return the text content of a file."""
# get the file body from the upload file object
mimetype = file.content_type
print(f"mimetype: {mimetype}")
print(f"file.file: {file.file}")
print("file: ", file)
file_stream = await file.read()
temp_file_path = "/tmp/temp_file"
# write the file to a temporary location
with open(temp_file_path, "wb") as f:
f.write(file_stream)
try:
extracted_text = extract_text_from_filepath(temp_file_path, mimetype)
except Exception as e:
print(f"Error: {e}")
os.remove(temp_file_path)
raise e
# remove file from temp location
os.remove(temp_file_path)
return extracted_text