Skip to content

Commit

Permalink
Merge pull request #1377 from JeevaRamanathan/feature/file-json
Browse files Browse the repository at this point in the history
feat: JSON Parser Implementation
  • Loading branch information
dartpain authored Oct 26, 2024
2 parents bea0cca + 54ad6ad commit 1c791f2
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 7 deletions.
1 change: 1 addition & 0 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def post(self):
".epub",
".html",
".mdx",
".json",
".xlsx",
],
job_name,
Expand Down
2 changes: 2 additions & 0 deletions application/parser/file/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from application.parser.file.markdown_parser import MarkdownParser
from application.parser.file.rst_parser import RstParser
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
from application.parser.file.json_parser import JSONParser
from application.parser.schema.base import Document

DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
Expand All @@ -23,6 +24,7 @@
".rst": RstParser(),
".html": HTMLParser(),
".mdx": MarkdownParser(),
".json":JSONParser(),
}


Expand Down
57 changes: 57 additions & 0 deletions application/parser/file/json_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
from typing import Any, Dict, List, Union
from pathlib import Path

from application.parser.file.base_parser import BaseParser

class JSONParser(BaseParser):
r"""JSON (.json) parser.
Parses JSON files into a list of strings or a concatenated document.
It handles both JSON objects (dictionaries) and arrays (lists).
Args:
concat_rows (bool): Whether to concatenate all rows into one document.
If set to False, a Document will be created for each item in the JSON.
True by default.
row_joiner (str): Separator to use for joining each row.
Only used when `concat_rows=True`.
Set to "\n" by default.
json_config (dict): Options for parsing JSON. Can be used to specify options like
custom decoding or formatting. Set to empty dict by default.
"""

def __init__(
self,
*args: Any,
concat_rows: bool = True,
row_joiner: str = "\n",
json_config: dict = {},
**kwargs: Any
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._concat_rows = concat_rows
self._row_joiner = row_joiner
self._json_config = json_config

def _init_parser(self) -> Dict:
"""Init parser."""
return {}

def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
"""Parse JSON file."""

with open(file, 'r', encoding='utf-8') as f:
data = json.load(f, **self._json_config)

if isinstance(data, dict):
data = [data]

if self._concat_rows:
return self._row_joiner.join([str(item) for item in data])
else:
return data
2 changes: 0 additions & 2 deletions frontend/src/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,8 @@ body.dark {
.table-default td:last-child {
@apply border-r-0; /* Ensure no right border on the last column */
}

}


/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */

/* Document
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/locale/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"start": "Start Chatting",
"name": "Name",
"choose": "Choose Files",
"info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip limited to 25mb",
"info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limited to 25mb",
"uploadedFiles": "Uploaded Files",
"cancel": "Cancel",
"train": "Train",
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/locale/es.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"start": "Empezar a chatear",
"name": "Nombre",
"choose": "Seleccionar Archivos",
"info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip limitados a 25 MB",
"info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limitados a 25 MB",
"uploadedFiles": "Archivos Subidos",
"cancel": "Cancelar",
"train": "Entrenar",
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/locale/jp.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"start": "チャットを開始する",
"name": "名前",
"choose": "ファイルを選択",
"info": ".pdf, .txt, .rst, .docx, .md, .zipファイルを25MBまでアップロードしてください",
"info": ".pdf, .txt, .rst, .docx, .md, .json, .zipファイルを25MBまでアップロードしてください",
"uploadedFiles": "アップロードされたファイル",
"cancel": "キャンセル",
"train": "トレーニング",
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/locale/zh-TW.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"remote": "遠端",
"name": "名稱",
"choose": "選擇檔案",
"info": "請上傳 .pdf, .txt, .rst, .docx, .md, .zip 檔案,大小限制為 25MB",
"info": "請上傳 .pdf, .txt, .rst, .docx, .md, .json, .zip 檔案,大小限制為 25MB",
"uploadedFiles": "已上傳的檔案",
"cancel": "取消",
"train": "訓練",
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/locale/zh.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"start": "开始聊天",
"name": "名称",
"choose": "选择文件",
"info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip 文件,限 25MB",
"info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip 文件,限 25MB",
"uploadedFiles": "已上传文件",
"cancel": "取消",
"train": "训练",
Expand Down
1 change: 1 addition & 0 deletions frontend/src/upload/Upload.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ function Upload({
'application/zip': ['.zip'],
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
['.docx'],
'application/json': ['.json'],
'text/csv': ['.csv'],
'text/html': ['.html'],
'application/epub+zip': ['.epub'],
Expand Down

0 comments on commit 1c791f2

Please sign in to comment.