From 21cd03aae08baf6beb4012162a80e1fe8f879881 Mon Sep 17 00:00:00 2001 From: Shreya Shankar Date: Sat, 5 Oct 2024 15:27:14 -0700 Subject: [PATCH 01/29] feat: add server for ui --- poetry.lock | 72 +++++++++++++++++++++++++ pyproject.toml | 3 ++ server/__init__.py | 0 server/app/__init__.py | 0 server/app/main.py | 27 ++++++++++ server/app/models.py | 5 ++ server/app/routes/__init__.py | 0 server/app/routes/pipeline.py | 14 +++++ server/app/services/__init__.py | 0 server/app/services/pipeline_service.py | 16 ++++++ 10 files changed, 137 insertions(+) create mode 100644 server/__init__.py create mode 100644 server/app/__init__.py create mode 100644 server/app/main.py create mode 100644 server/app/models.py create mode 100644 server/app/routes/__init__.py create mode 100644 server/app/routes/pipeline.py create mode 100644 server/app/services/__init__.py create mode 100644 server/app/services/pipeline_service.py diff --git a/poetry.lock b/poetry.lock index 9694fbd8..cdd18212 100644 --- a/poetry.lock +++ b/poetry.lock @@ -778,6 +778,26 @@ files = [ [package.extras] testing = ["hatch", "pre-commit", "pytest", "tox"] +[[package]] +name = "fastapi" +version = "0.115.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = true +python-versions = ">=3.8" +files = [ + {file = "fastapi-0.115.0-py3-none-any.whl", hash = "sha256:17ea427674467486e997206a5ab25760f6b09e069f099b96f5b55a32fb6f1631"}, + {file = "fastapi-0.115.0.tar.gz", hash = "sha256:f93b4ca3529a8ebc6fc3fcf710e5efa8de3df9b41570958abf1d97d843138004"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" +starlette = ">=0.37.2,<0.39.0" +typing-extensions = ">=4.8.0" + +[package.extras] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"] + [[package]] name = "filelock" version = "3.16.1" @@ -4091,6 +4111,38 @@ files = [ {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, ] +[[package]] +name = "starlette" +version = "0.38.6" +description = "The little ASGI library that shines." +optional = true +python-versions = ">=3.8" +files = [ + {file = "starlette-0.38.6-py3-none-any.whl", hash = "sha256:4517a1409e2e73ee4951214ba012052b9e16f60e90d73cfb06192c19203bbb05"}, + {file = "starlette-0.38.6.tar.gz", hash = "sha256:863a1588f5574e70a821dadefb41e4881ea451a47a3cd1b4df359d4ffefe5ead"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] + +[[package]] +name = "tenacity" +version = "9.0.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, + {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "termcolor" version = "2.5.0" @@ -4378,6 +4430,25 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "uvicorn" +version = "0.31.0" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.8" +files = [ + {file = "uvicorn-0.31.0-py3-none-any.whl", hash = "sha256:cac7be4dd4d891c363cd942160a7b02e69150dcbc7a36be04d5f4af4b17c8ced"}, + {file = "uvicorn-0.31.0.tar.gz", hash = "sha256:13bc21373d103859f68fe739608e2eb054a816dea79189bc3ca08ea89a275906"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + [[package]] name = "virtualenv" version = "20.27.0" @@ -4568,6 +4639,7 @@ type = ["pytest-mypy"] [extras] parsing = ["azure-ai-documentintelligence", "openpyxl", "paddleocr", "paddlepaddle", "pydub", "pymupdf", "python-docx", "python-pptx"] +server = ["fastapi"] [metadata] lock-version = "2.0" diff --git a/pyproject.toml b/pyproject.toml index 3eb67cad..207d74c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,12 @@ paddleocr = { version = "^2.8.1", optional = true } pymupdf = { version = "^1.24.10", optional = true } jsonschema = "^4.23.0" rapidfuzz = "^3.10.0" +fastapi = { version = "^0.115.0", optional = true } +uvicorn = { version = "^0.31.0", optional = true } [tool.poetry.extras] parsing = ["python-docx", "openpyxl", "pydub", "python-pptx", "azure-ai-documentintelligence", "paddlepaddle", "paddleocr", "pymupdf"] +server = ["fastapi", "uvicorn"] [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/app/__init__.py b/server/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/app/main.py b/server/app/main.py new file mode 100644 index 00000000..ad2c0837 --- /dev/null +++ b/server/app/main.py @@ -0,0 +1,27 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from server.app.routes import pipeline + +app = FastAPI() + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000"], # Adjust this to your Next.js app's URL + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(pipeline.router) + + +@app.get("/") +async def root(): + return {"message": "DocETL API is running"} + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run("server.app.main:app", host="0.0.0.0", port=8000, reload=True) diff --git a/server/app/models.py b/server/app/models.py new file mode 100644 index 00000000..c21db8c8 --- /dev/null +++ b/server/app/models.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class PipelineRequest(BaseModel): + yaml_config: str diff --git a/server/app/routes/__init__.py b/server/app/routes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/app/routes/pipeline.py b/server/app/routes/pipeline.py new file mode 100644 index 00000000..cdf26656 --- /dev/null +++ b/server/app/routes/pipeline.py @@ -0,0 +1,14 @@ +from fastapi import APIRouter, HTTPException, Depends +from server.app.models import PipelineRequest +from server.app.services.pipeline_service import run_pipeline_service + +router = APIRouter() + + +@router.post("/run_pipeline") +async def run_pipeline(request: PipelineRequest): + try: + result = run_pipeline_service(request.yaml_config) + return result + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) diff --git a/server/app/services/__init__.py b/server/app/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/app/services/pipeline_service.py b/server/app/services/pipeline_service.py new file mode 100644 index 00000000..fda76bf5 --- /dev/null +++ b/server/app/services/pipeline_service.py @@ -0,0 +1,16 @@ +import yaml +from docetl.api import Pipeline + + +def run_pipeline_service(yaml_config: str): + config = yaml.safe_load(yaml_config) + pipeline = Pipeline.from_dict(config) + cost = pipeline.run() + + output_path = config["pipeline"]["output"]["path"] + + return { + "message": "Pipeline executed successfully", + "cost": cost, + "output_file": output_path, + } From 2e5a8dae0214ca96877868bcbbd321189907ef04 Mon Sep 17 00:00:00 2001 From: Shreya Shankar Date: Sun, 6 Oct 2024 21:16:11 -0700 Subject: [PATCH 02/29] feat: improving UI --- docetl/operations/gather.py | 9 +- docetl/operations/map.py | 5 +- docetl/runner.py | 2 + server/app/routes/pipeline.py | 2 +- server/app/services/pipeline_service.py | 25 +- website/package-lock.json | 319 ++++++- website/package.json | 6 + website/src/app/api/readFile/route.ts | 18 + website/src/app/api/runPipeline/route.ts | 154 ++++ website/src/app/playground/page.tsx | 46 +- website/src/app/types.ts | 13 +- website/src/components/BookmarksPanel.tsx | 2 +- website/src/components/DatasetView.tsx | 45 +- website/src/components/FileExplorer.tsx | 74 +- website/src/components/OperationCard.tsx | 631 +++++++------- website/src/components/Output.tsx | 167 +++- website/src/components/PipelineGui.tsx | 261 +++++- website/src/components/ResizableDataTable.tsx | 66 +- website/src/components/operations/args.tsx | 232 +++++ .../src/components/operations/components.tsx | 803 ++++++++++++++++++ website/src/components/ui/badge.tsx | 36 + website/src/components/ui/context-menu.tsx | 204 +++++ website/src/components/ui/skeleton.tsx | 15 + website/src/contexts/PipelineContext.tsx | 46 +- website/src/hooks/useFileExplorer.ts | 17 +- website/src/mocks/mockData.ts | 34 +- 26 files changed, 2724 insertions(+), 508 deletions(-) create mode 100644 website/src/app/api/readFile/route.ts create mode 100644 website/src/app/api/runPipeline/route.ts create mode 100644 website/src/components/operations/args.tsx create mode 100644 website/src/components/operations/components.tsx create mode 100644 website/src/components/ui/badge.tsx create mode 100644 website/src/components/ui/context-menu.tsx create mode 100644 website/src/components/ui/skeleton.tsx diff --git a/docetl/operations/gather.py b/docetl/operations/gather.py index 6d97020c..5d6cfec1 100644 --- a/docetl/operations/gather.py +++ b/docetl/operations/gather.py @@ -48,12 +48,7 @@ def syntax_check(self) -> None: f"Missing required key '{key}' in GatherOperation configuration" ) - if "peripheral_chunks" not in self.config: - raise ValueError( - "Missing 'peripheral_chunks' configuration in GatherOperation" - ) - - peripheral_config = self.config["peripheral_chunks"] + peripheral_config = self.config.get("peripheral_chunks", {}) for direction in ["previous", "next"]: if direction not in peripheral_config: continue @@ -87,7 +82,7 @@ def execute(self, input_data: List[Dict]) -> Tuple[List[Dict], float]: content_key = self.config["content_key"] doc_id_key = self.config["doc_id_key"] order_key = self.config["order_key"] - peripheral_config = self.config["peripheral_chunks"] + peripheral_config = self.config.get("peripheral_chunks", {}) main_chunk_start = self.config.get( "main_chunk_start", "--- Begin Main Chunk ---" ) diff --git a/docetl/operations/map.py b/docetl/operations/map.py index 60e8b9ec..1502dafc 100644 --- a/docetl/operations/map.py +++ b/docetl/operations/map.py @@ -379,11 +379,14 @@ def process_prompt(item, prompt_config): local_output_schema = { key: output_schema[key] for key in prompt_config["output_keys"] } + model = prompt_config.get("model", self.default_model) + if not model: + model = self.default_model # Start of Selection # If there are tools, we need to pass in the tools response = self.runner.api.call_llm( - prompt_config.get("model", self.default_model), + model, "parallel_map", [{"role": "user", "content": prompt}], local_output_schema, diff --git a/docetl/runner.py b/docetl/runner.py index 7666e905..776cf435 100644 --- a/docetl/runner.py +++ b/docetl/runner.py @@ -285,6 +285,8 @@ def save(self, data: List[Dict]): self.console.rule("[cyan]Saving Output[/cyan]") output_config = self.config["pipeline"]["output"] if output_config["type"] == "file": + # Create the directory if it doesn't exist + os.makedirs(os.path.dirname(output_config["path"]), exist_ok=True) if output_config["path"].lower().endswith(".json"): with open(output_config["path"], "w") as file: json.dump(data, file, indent=2) diff --git a/server/app/routes/pipeline.py b/server/app/routes/pipeline.py index cdf26656..90442cef 100644 --- a/server/app/routes/pipeline.py +++ b/server/app/routes/pipeline.py @@ -11,4 +11,4 @@ async def run_pipeline(request: PipelineRequest): result = run_pipeline_service(request.yaml_config) return result except Exception as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/server/app/services/pipeline_service.py b/server/app/services/pipeline_service.py index fda76bf5..7adee1d9 100644 --- a/server/app/services/pipeline_service.py +++ b/server/app/services/pipeline_service.py @@ -1,16 +1,15 @@ -import yaml -from docetl.api import Pipeline +from docetl.runner import DSLRunner def run_pipeline_service(yaml_config: str): - config = yaml.safe_load(yaml_config) - pipeline = Pipeline.from_dict(config) - cost = pipeline.run() - - output_path = config["pipeline"]["output"]["path"] - - return { - "message": "Pipeline executed successfully", - "cost": cost, - "output_file": output_path, - } + try: + runner = DSLRunner.from_yaml(yaml_config) + cost = runner.run() + + return { + "message": "Pipeline executed successfully", + "cost": cost, + } + except Exception as e: + print(f"Error occurred: {str(e)}") + raise e diff --git a/website/package-lock.json b/website/package-lock.json index be5ff695..ce272dcb 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -13,6 +13,7 @@ "@radix-ui/react-accordion": "^1.2.0", "@radix-ui/react-checkbox": "^1.1.2", "@radix-ui/react-collapsible": "^1.1.0", + "@radix-ui/react-context-menu": "^2.2.2", "@radix-ui/react-dialog": "^1.1.2", "@radix-ui/react-dropdown-menu": "^2.1.1", "@radix-ui/react-icons": "^1.3.0", @@ -26,11 +27,15 @@ "@radix-ui/react-toast": "^1.2.1", "@radix-ui/react-tooltip": "^1.1.2", "@tanstack/react-table": "^8.20.5", + "axios": "^1.7.7", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", "css-loader": "^7.1.2", "framer-motion": "^11.5.4", "gray-matter": "^4.0.3", + "js-yaml": "^4.1.0", + "json2csv": "^6.0.0-alpha.2", + "lodash": "^4.17.21", "lucide-react": "^0.441.0", "next": "14.2.11", "re-resizable": "^6.10.0", @@ -48,6 +53,7 @@ "zod": "^3.23.8" }, "devDependencies": { + "@types/lodash": "^4.17.10", "@types/node": "^20", "@types/react": "^18", "@types/react-beautiful-dnd": "^13.1.8", @@ -729,6 +735,196 @@ } } }, + "node_modules/@radix-ui/react-context-menu": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context-menu/-/react-context-menu-2.2.2.tgz", + "integrity": "sha512-99EatSTpW+hRYHt7m8wdDlLtkmTovEe8Z/hnxUPV+SKuuNL5HWNhQI4QSdjZqNSgXHay2z4M3Dym73j9p2Gx5Q==", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-menu": "2.1.2", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.1.tgz", + "integrity": "sha512-QSxg29lfr/xcev6kSz7MAlmDnzbP1eI/Dwn3Tp1ip0KT5CUELsxkekFEMVBEoykI3oV39hKT4TKZzBNMbcTZYQ==", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-escape-keydown": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-focus-guards": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.1.tgz", + "integrity": "sha512-pSIwfrT1a6sIoDASCSpFwOasEwKTZWDw/iBdtnqKO7v6FeOzYJ7U53cPzYFVR3geGGXgVHaH+CdngrrAzqUGxg==", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-menu": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.2.tgz", + "integrity": "sha512-lZ0R4qR2Al6fZ4yCCZzu/ReTFrylHFxIqy7OezIpWF4bL0o9biKo0pFIvkaew3TyZ9Fy5gYVrR5zCGZBVbO1zg==", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-collection": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-dismissable-layer": "1.1.1", + "@radix-ui/react-focus-guards": "1.1.1", + "@radix-ui/react-focus-scope": "1.1.0", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-popper": "1.2.0", + "@radix-ui/react-portal": "1.1.2", + "@radix-ui/react-presence": "1.1.1", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-roving-focus": "1.1.0", + "@radix-ui/react-slot": "1.1.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.6.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-portal": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.2.tgz", + "integrity": "sha512-WeDYLGPxJb/5EGBoedyJbT0MpoULmwnIPMJMSldkuiMsBAv7N1cRdsTWZWht9vpPOiN3qyiGAtbK2is47/uMFg==", + "dependencies": { + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-presence": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.1.tgz", + "integrity": "sha512-IeFXVi4YS1K0wVZzXNrbaaUvIJ3qdY+/Ih4eHFhWA9SwGR9UDX7Ck8abvL57C4cv3wwMvUE0OG69Qc3NCcTe/A==", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu/node_modules/react-remove-scroll": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.0.tgz", + "integrity": "sha512-I2U4JVEsQenxDAKaVa3VZ/JeJZe0/2DxPWL8Tj8yLKctQJQiZM52pn/GWFpSp8dftjM3pSAHVJZscAnC/y+ySQ==", + "dependencies": { + "react-remove-scroll-bar": "^2.3.6", + "react-style-singleton": "^2.2.1", + "tslib": "^2.1.0", + "use-callback-ref": "^1.3.0", + "use-sidecar": "^1.1.2" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-dialog": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.2.tgz", @@ -1730,6 +1926,11 @@ "integrity": "sha512-WJgX9nzTqknM393q1QJDJmoW28kUfEnybeTfVNcNAPnIx210RXm2DiXiHzfNPJNIUUb1tJnz/l4QGtJ30PgWmA==", "dev": true }, + "node_modules/@streamparser/json": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/@streamparser/json/-/json-0.0.6.tgz", + "integrity": "sha512-vL9EVn/v+OhZ+Wcs6O4iKE9EUpwHUqHmCtNUMWjqp+6dr85+XPOSGTEsqYNq1Vn04uk9SWlOVmx9J48ggJVT2Q==" + }, "node_modules/@swc/counter": { "version": "0.1.3", "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", @@ -1824,6 +2025,12 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/lodash": { + "version": "4.17.10", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.10.tgz", + "integrity": "sha512-YpS0zzoduEhuOWjAotS6A5AVCva7X4lVlYLF0FYHAY9sdraBfnatttHItlWeZdGhuEkf+OzMNg2ZYAx8t+52uQ==", + "dev": true + }, "node_modules/@types/mdast": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", @@ -2376,8 +2583,7 @@ "node_modules/argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "node_modules/aria-hidden": { "version": "1.2.4", @@ -2564,6 +2770,11 @@ "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", "dev": true }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, "node_modules/available-typed-arrays": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", @@ -2588,6 +2799,16 @@ "node": ">=4" } }, + "node_modules/axios": { + "version": "1.7.7", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz", + "integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/axobject-query": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz", @@ -2899,6 +3120,17 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/comma-separated-tokens": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", @@ -3150,6 +3382,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/dequal": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", @@ -4019,6 +4259,25 @@ "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", "dev": true }, + "node_modules/follow-redirects": { + "version": "1.15.9", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", + "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/for-each": { "version": "0.3.3", "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", @@ -4043,6 +4302,19 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/framer-motion": { "version": "11.5.4", "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.5.4.tgz", @@ -5103,7 +5375,6 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "dev": true, "dependencies": { "argparse": "^2.0.1" }, @@ -5134,6 +5405,31 @@ "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", "dev": true }, + "node_modules/json2csv": { + "version": "6.0.0-alpha.2", + "resolved": "https://registry.npmjs.org/json2csv/-/json2csv-6.0.0-alpha.2.tgz", + "integrity": "sha512-nJ3oP6QxN8z69IT1HmrJdfVxhU1kLTBVgMfRnNZc37YEY+jZ4nU27rBGxT4vaqM/KUCavLRhntmTuBFqZLBUcA==", + "dependencies": { + "@streamparser/json": "^0.0.6", + "commander": "^6.2.0", + "lodash.get": "^4.4.2" + }, + "bin": { + "json2csv": "bin/json2csv.js" + }, + "engines": { + "node": ">= 12", + "npm": ">= 6.13.0" + } + }, + "node_modules/json2csv/node_modules/commander": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", + "integrity": "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==", + "engines": { + "node": ">= 6" + } + }, "node_modules/json5": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", @@ -5246,6 +5542,16 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, + "node_modules/lodash.get": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz", + "integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==" + }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", @@ -5885,7 +6191,6 @@ "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "peer": true, "engines": { "node": ">= 0.6" } @@ -5894,7 +6199,6 @@ "version": "2.1.35", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "peer": true, "dependencies": { "mime-db": "1.52.0" }, @@ -6617,6 +6921,11 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", diff --git a/website/package.json b/website/package.json index 880a1fd1..0d896149 100644 --- a/website/package.json +++ b/website/package.json @@ -14,6 +14,7 @@ "@radix-ui/react-accordion": "^1.2.0", "@radix-ui/react-checkbox": "^1.1.2", "@radix-ui/react-collapsible": "^1.1.0", + "@radix-ui/react-context-menu": "^2.2.2", "@radix-ui/react-dialog": "^1.1.2", "@radix-ui/react-dropdown-menu": "^2.1.1", "@radix-ui/react-icons": "^1.3.0", @@ -27,11 +28,15 @@ "@radix-ui/react-toast": "^1.2.1", "@radix-ui/react-tooltip": "^1.1.2", "@tanstack/react-table": "^8.20.5", + "axios": "^1.7.7", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", "css-loader": "^7.1.2", "framer-motion": "^11.5.4", "gray-matter": "^4.0.3", + "js-yaml": "^4.1.0", + "json2csv": "^6.0.0-alpha.2", + "lodash": "^4.17.21", "lucide-react": "^0.441.0", "next": "14.2.11", "re-resizable": "^6.10.0", @@ -49,6 +54,7 @@ "zod": "^3.23.8" }, "devDependencies": { + "@types/lodash": "^4.17.10", "@types/node": "^20", "@types/react": "^18", "@types/react-beautiful-dnd": "^13.1.8", diff --git a/website/src/app/api/readFile/route.ts b/website/src/app/api/readFile/route.ts new file mode 100644 index 00000000..7d484fe4 --- /dev/null +++ b/website/src/app/api/readFile/route.ts @@ -0,0 +1,18 @@ +import { NextRequest, NextResponse } from 'next/server'; +import fs from 'fs/promises'; + +export async function GET(req: NextRequest) { + const filePath = req.nextUrl.searchParams.get('path'); + + if (!filePath) { + return NextResponse.json({ error: 'Invalid file path' }, { status: 400 }); + } + + try { + const fileContent = await fs.readFile(filePath, 'utf-8'); + return new NextResponse(fileContent, { status: 200 }); + } catch (error) { + console.error('Error reading file:', error); + return NextResponse.json({ error: 'Failed to read file' }, { status: 500 }); + } +} \ No newline at end of file diff --git a/website/src/app/api/runPipeline/route.ts b/website/src/app/api/runPipeline/route.ts new file mode 100644 index 00000000..d61a76ae --- /dev/null +++ b/website/src/app/api/runPipeline/route.ts @@ -0,0 +1,154 @@ +import { NextResponse } from 'next/server'; +import yaml from 'js-yaml'; +import fs from 'fs/promises'; +import path from 'path'; +import axios from 'axios'; +import os from 'os'; +import { Operation, SchemaItem } from '@/app/types'; + +export async function POST(request: Request) { + try { + const { default_model, data, operations, operation_id, name, sample_size } = await request.json(); + + + if (!name) { + return NextResponse.json({ error: 'Pipeline name is required' }, { status: 400 }); + } + + if (!data) { + return NextResponse.json({ error: 'Data is required. Please select a file in the sidebar.' }, { status: 400 }); + } + + // Create pipeline configuration based on tutorial.yaml example + const homeDir = os.homedir(); + + const datasets = { + input: { + type: 'file', + path: data.path, + source: 'local' + } + }; + + // Augment the first operation with sample if sampleSize is not null + if (operations.length > 0 && sample_size !== null) { + operations[0] = { + ...operations[0], + sample: sample_size + }; + } + + // Fix the output schema of all operations to ensure correct typing + const updatedOperations: Record = operations.map((op: Operation) => { + + // Let new op be a dictionary representation of the operation + let newOp: Record = { + ...op, + ...op.otherKwargs + } + + + if (!op.output || !op.output.schema) return newOp; + + const processSchemaItem = (item: SchemaItem): string => { + if (item.type === 'list') { + if (!item.subType) { + throw new Error(`List type must specify its elements for field: ${item.key}`); + } + const subType = typeof item.subType === 'string' ? item.subType : processSchemaItem(item.subType as SchemaItem); + return `list[${subType}]`; + } else if (item.type === 'dict' ) { + if (!item.subType) { + throw new Error(`Dict/Object type must specify its structure for field: ${item.key}`); + } + const subSchema = Object.entries(item.subType).reduce((acc, [key, value]) => { + acc[key] = processSchemaItem(value as SchemaItem); + return acc; + }, {} as Record); + return JSON.stringify(subSchema); + } else { + return item.type; + } + }; + + return { + ...newOp, + output: { + schema: op.output.schema.reduce((acc: Record, item: SchemaItem) => { + acc[item.key] = processSchemaItem(item); + return acc; + }, {}) + } + }; + }); + + + // Fetch all operations up until and including the operation_id + const operationsToRun = operations.slice(0, operations.findIndex((op: Operation) => op.id === operation_id) + 1); + + const pipelineConfig = { + datasets, + default_model, + operations: updatedOperations, + pipeline: { + steps: [ + { + name: 'data_processing', + input: Object.keys(datasets)[0], // Assuming the first dataset is the input + operations: operationsToRun.map((op: any) => op.name) + } + ], + output: { + type: 'file', + path: path.join(homeDir, '.docetl', 'pipelines', 'outputs', `${name}.json`), + intermediate_dir: path.join(homeDir, '.docetl', 'pipelines', name,'intermediates') + } + } + }; + + // Get the inputPath from the intermediate_dir + let inputPath; + const prevOpIndex = operationsToRun.length - 2; + + if (prevOpIndex >= 0) { + const inputBase = pipelineConfig.pipeline.output.intermediate_dir; + const opName = operationsToRun[prevOpIndex].name; + inputPath = path.join(inputBase, "data_processing", opName + '.json'); + } else { + // If there are no previous operations, use the dataset path + inputPath = data.path; + } + const yamlString = yaml.dump(pipelineConfig); + + console.log(yamlString); + + // Save the YAML file in the user's home directory + const pipelineDir = path.join(homeDir, '.docetl', 'pipelines', 'configs'); + await fs.mkdir(pipelineDir, { recursive: true }); + const filePath = path.join(pipelineDir, `${name}.yaml`); + await fs.writeFile(filePath, yamlString, 'utf8'); + + // Submit the YAML config to the FastAPI endpoint + const response = await axios.post('http://localhost:8000/run_pipeline', { + yaml_config: filePath + }); + + return NextResponse.json({ + message: 'Pipeline YAML created and submitted successfully', + filePath, + apiResponse: response.data, + outputPath: pipelineConfig.pipeline.output.path, + inputPath: inputPath + }); + } catch (error) { + let errorMessage; + if (error instanceof axios.AxiosError && error.response && error.response.data) { + errorMessage = error.response.data.detail || String(error); + } else if (error instanceof Error) { + errorMessage = error.message; + } else { + errorMessage = String(error); + } + return NextResponse.json({ error: `Failed to create or submit pipeline YAML: ${errorMessage}` }, { status: 500 }); + } +} diff --git a/website/src/app/playground/page.tsx b/website/src/app/playground/page.tsx index b280365a..4d67ecf8 100644 --- a/website/src/app/playground/page.tsx +++ b/website/src/app/playground/page.tsx @@ -1,7 +1,7 @@ 'use client' import React, { useState } from 'react'; -import { FileText, Maximize2, Minimize2, Plus, Play, GripVertical, Trash2, ChevronDown, Zap, Upload } from 'lucide-react'; +import { FileText, Maximize2, Minimize2, Plus, Play, GripVertical, Trash2, ChevronDown, Zap, Upload, Scroll } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { ResizableHandle, ResizablePanel, ResizablePanelGroup } from "@/components/ui/resizable"; import { DropResult } from 'react-beautiful-dnd'; @@ -81,14 +81,14 @@ const CodeEditorPipelineApp: React.FC = () => { const [showOutput, setShowOutput] = useState(true); const [showDatasetView, setShowDatasetView] = useState(false); - const { operations, currentFile, setOperations, setCurrentFile } = usePipelineContext(); - const { files, handleFileClick, handleFileUpload, handleFilesUpdate } = useFileExplorer(); + const { operations, currentFile, setOperations, setCurrentFile, cost } = usePipelineContext(); + const { files, handleFileClick, handleFileUpload, handleFileDelete } = useFileExplorer(); const handleAddOperation = (llmType: string, type: string, name: string) => { const newOperation: Operation = { id: String(Date.now()), llmType: llmType as 'LLM' | 'non-LLM', - type: type as 'map' | 'reduce' | 'filter' | 'equijoin' | 'resolve' | 'parallel-map' | 'unnest' | 'split' | 'gather', + type: type as 'map' | 'reduce' | 'filter' | 'resolve' | 'parallel_map' | 'unnest' | 'split' | 'gather', name: name, }; setOperations([...operations, newOperation]); @@ -104,25 +104,18 @@ const CodeEditorPipelineApp: React.FC = () => { setOperations(items); }; - const handleRunAll = () => { - console.log("Running all operations"); - // Implement the actual run all logic here - }; - - const handleDeleteOperation = (id: string) => { - setOperations(operations.filter(op => op.id !== id)); - }; - - const handleUpdateOperation = (id: string, updatedOperation: Operation) => { - setOperations(operations.map(op => op.id === id ? updatedOperation : op)); - }; - return ( - + {/* */}
-
-
+
+
+
+ +

DocETL

+
+
+ Cost: ${cost.toFixed(2)} @@ -184,10 +177,12 @@ const CodeEditorPipelineApp: React.FC = () => { onFileClick={(file) => { handleFileClick(file); setCurrentFile(file); - setShowDatasetView(true); }} onFileUpload={handleFileUpload} - onFilesUpdate={handleFilesUpdate} + onFileDelete={handleFileDelete} + setCurrentFile={setCurrentFile} + setShowDatasetView={setShowDatasetView} + currentFile={currentFile} /> @@ -204,12 +199,7 @@ const CodeEditorPipelineApp: React.FC = () => { {showOutput && } @@ -230,7 +220,7 @@ const CodeEditorPipelineApp: React.FC = () => { )}
- + {/* */} ); }; diff --git a/website/src/app/types.ts b/website/src/app/types.ts index 356b9c11..7c628ffe 100644 --- a/website/src/app/types.ts +++ b/website/src/app/types.ts @@ -1,17 +1,18 @@ export type File = { name: string; - content: string; + path: string; }; export type Operation = { id: string; llmType: 'LLM' | 'non-LLM'; - type: 'map' | 'reduce' | 'filter' | 'equijoin' | 'resolve' | 'parallel-map' | 'unnest' | 'split' | 'gather'; + type: 'map' | 'reduce' | 'filter' | 'resolve' | 'parallel_map' | 'unnest' | 'split' | 'gather'; name: string; prompt?: string; - outputSchema?: Record; + output?: {schema: SchemaItem[]}; validate?: string[]; otherKwargs?: Record; + runIndex?: number; }; export type OutputRow = Record; @@ -42,4 +43,10 @@ export interface BookmarkContextType { addBookmark: (text: string, source: string, color: string, notes: UserNote[]) => void; removeBookmark: (id: string) => void; } + +export interface OutputType { + path: string; + operationId: string; + inputPath?: string; +} \ No newline at end of file diff --git a/website/src/components/BookmarksPanel.tsx b/website/src/components/BookmarksPanel.tsx index 9731ad0c..ccbd006f 100644 --- a/website/src/components/BookmarksPanel.tsx +++ b/website/src/components/BookmarksPanel.tsx @@ -36,7 +36,7 @@ const BookmarksPanel: React.FC = () => { }; return ( -
+

Bookmarks diff --git a/website/src/components/DatasetView.tsx b/website/src/components/DatasetView.tsx index 1376e209..e4a38db0 100644 --- a/website/src/components/DatasetView.tsx +++ b/website/src/components/DatasetView.tsx @@ -1,12 +1,48 @@ import { File } from '@/app/types'; -import React from 'react'; +import React, { useState, useEffect } from 'react'; +import { Badge } from '@/components/ui/badge'; +const DatasetView: React.FC<{ file: File | null }> = ({ file }) => { + const [content, setContent] = useState(null); + const [keys, setKeys] = useState([]); -const DatasetView: React.FC<{ file: File | null }> = ({ file }) => ( + useEffect(() => { + const loadFileContent = async () => { + if (file?.path) { + try { + // Fetch file content from the server + const response = await fetch(`/api/readFile?path=${encodeURIComponent(file.path)}`); + if (!response.ok) { + throw new Error('Failed to fetch file content'); + } + const fileContent = await response.text(); + setContent(fileContent); + const jsonContent = JSON.parse(fileContent); + if (Array.isArray(jsonContent) && jsonContent.length > 0) { + setKeys(Object.keys(jsonContent[0])); + } + } catch (error) { + console.error('Error reading or parsing file:', error); + setContent(null); + setKeys([]); + } + } + }; + + loadFileContent(); + }, [file]); + + return (

{file?.name}

+
+

Available keys:

+ {keys.map((key) => ( + {key} + ))} +
-        {file?.content.split('\n').map((line, index) => (
+        {content?.split('\n').map((line, index) => (
           
{index + 1} {line} @@ -15,5 +51,6 @@ const DatasetView: React.FC<{ file: File | null }> = ({ file }) => (
); +}; - export default DatasetView; \ No newline at end of file +export default DatasetView; \ No newline at end of file diff --git a/website/src/components/FileExplorer.tsx b/website/src/components/FileExplorer.tsx index b01a8275..82d344f5 100644 --- a/website/src/components/FileExplorer.tsx +++ b/website/src/components/FileExplorer.tsx @@ -1,51 +1,53 @@ -import React, { useEffect } from 'react'; -import { FileText, Upload } from 'lucide-react'; +import React, { useState } from 'react'; +import { FileText, Upload, Trash2, Eye } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { Input } from '@/components/ui/input'; import { File } from '@/app/types'; +import { + ContextMenu, + ContextMenuContent, + ContextMenuItem, + ContextMenuTrigger, +} from "@/components/ui/context-menu" interface FileExplorerProps { files: File[]; onFileClick: (file: File) => void; onFileUpload: (file: File) => void; - onFilesUpdate: (updatedFiles: File[]) => void; + onFileDelete: (file: File) => void; + currentFile: File | null; + setCurrentFile: (file: File | null) => void; + setShowDatasetView: (show: boolean) => void; } export const FileExplorer: React.FC = ({ files, onFileClick, onFileUpload, - onFilesUpdate, + onFileDelete, + currentFile, + setCurrentFile, + setShowDatasetView, }) => { - useEffect(() => { - fetch('/debate_transcripts.json') - .then(response => response.json()) - .then(data => { - const updatedFiles = files.map(file => - file.name === 'debate_transcripts.json' - ? { ...file, content: JSON.stringify(data, null, 2) } - : file - ); - onFilesUpdate(updatedFiles); - }); - }, []); const handleFileUpload = (event: React.ChangeEvent) => { const uploadedFile = event.target.files?.[0]; if (uploadedFile && uploadedFile.type === 'application/json') { - const reader = new FileReader(); - reader.onload = (e) => { - const content = e.target?.result as string; - onFileUpload({ name: uploadedFile.name, content }); - }; - reader.readAsText(uploadedFile); + const fullPath = uploadedFile.webkitRelativePath || URL.createObjectURL(uploadedFile); + onFileUpload({ name: uploadedFile.name, path: fullPath }); + setCurrentFile({ name: uploadedFile.name, path: fullPath }); } else { alert('Please upload a JSON file'); } }; + const handleFileSelection = (file: File) => { + setCurrentFile(file); + onFileClick(file); + }; + return ( -
+

File Explorer @@ -65,10 +67,28 @@ export const FileExplorer: React.FC = ({

{files.map((file) => ( -
onFileClick(file)}> - - {file.name} -
+ + handleFileSelection(file)} + > + + {file.name} + + + { + handleFileSelection(file); + setShowDatasetView(true); + }}> + + View File + + onFileDelete(file)}> + + Delete + + + ))}
diff --git a/website/src/components/OperationCard.tsx b/website/src/components/OperationCard.tsx index fbf44e29..e80ea79d 100644 --- a/website/src/components/OperationCard.tsx +++ b/website/src/components/OperationCard.tsx @@ -1,4 +1,4 @@ -import React, { useEffect, useState } from 'react'; +import React, { useReducer, useMemo, useCallback, useEffect, useRef, useState } from 'react'; import { Button } from '@/components/ui/button'; import { Input } from '@/components/ui/input'; import { @@ -11,108 +11,87 @@ import { import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription, DialogFooter } from '@/components/ui/dialog'; import { Card, CardHeader, CardContent } from '@/components/ui/card'; import { Textarea } from '@/components/ui/textarea'; -import { ResizableHandle, ResizablePanel, ResizablePanelGroup } from "@/components/ui/resizable"; -import { DragDropContext, Droppable, Draggable, DropResult } from 'react-beautiful-dnd'; +import { Draggable } from 'react-beautiful-dnd'; import { FileText, Maximize2, Minimize2, Plus, Play, GripVertical, Trash2, ChevronDown, Zap, Edit2, Settings } from 'lucide-react'; import { Operation, SchemaItem, SchemaType } from '@/app/types'; import { usePipelineContext } from '@/contexts/PipelineContext'; +import { useToast } from "@/hooks/use-toast" +import { Skeleton } from "@/components/ui/skeleton" +import { debounce } from 'lodash'; +import { Guardrails, OutputSchema, PromptInput } from './operations/args'; +import createOperationComponent from './operations/components'; -const SchemaForm: React.FC<{ - schema: SchemaItem[]; - onUpdate: (newSchema: SchemaItem[]) => void; - level?: number; - isList?: boolean; - }> = ({ schema, onUpdate, level = 0, isList = false }) => { - const addItem = () => { - if (isList) return; - onUpdate([...schema, { key: '', type: 'string' }]); - }; - - const updateItem = (index: number, item: SchemaItem) => { - const newSchema = [...schema]; - newSchema[index] = item; - onUpdate(newSchema); - }; - - const removeItem = (index: number) => { - if (isList) return; - const newSchema = schema.filter((_, i) => i !== index); - onUpdate(newSchema); - }; - - return ( -
- {schema.map((item, index) => ( -
- {!isList && ( - updateItem(index, { ...item, key: e.target.value })} - placeholder="Key" - className="w-1/3 min-w-[150px]" - /> - )} - - {!isList && ( - - )} - {item.type === 'list' && item.subType && ( -
- List type: - updateItem(index, { ...item, subType: newSubSchema[0] })} - level={0} - isList={true} - /> -
- )} - {item.type === 'dict' && item.subType && ( -
- updateItem(index, { ...item, subType: newSubSchema })} - level={level + 1} - /> -
- )} -
- ))} - {!isList && ( - +// Separate components +const OperationHeader: React.FC<{ + name: string; + type: string; + onEdit: (name: string) => void; + onDelete: () => void; + onRunOperation: () => void; + onToggleSettings: () => void; +}> = React.memo(({ name, type, onEdit, onDelete, onRunOperation, onToggleSettings }) => { + const [isEditing, setIsEditing] = useState(false); + const [editedName, setEditedName] = useState(name); + + const handleEditClick = () => { + setIsEditing(true); + setEditedName(name); + }; + + const handleEditComplete = () => { + setIsEditing(false); + onEdit(editedName); + }; + + return ( +
+ {/* Left side buttons */} +
+ + + +
+ + {/* Centered title */} +
+ {isEditing ? ( + setEditedName(e.target.value)} + onBlur={handleEditComplete} + onKeyPress={(e) => e.key === 'Enter' && handleEditComplete()} + className="text-sm font-medium w-1/2 font-mono text-center" + autoFocus + /> + ) : ( + + {name} ({type}) + )}
- ); - }; -// Settings Modal Component + {/* Right side delete button */} + +
+ ); +}); + const SettingsModal: React.FC<{ opName: string; opType: string; @@ -120,8 +99,8 @@ const SettingsModal: React.FC<{ onClose: () => void; otherKwargs: Record; onSettingsSave: (newSettings: Record) => void; -}> = ({ opName, opType, isOpen, onClose, otherKwargs, onSettingsSave }) => { - const [localSettings, setLocalSettings] = useState>( +}> = React.memo(({ opName, opType, isOpen, onClose, otherKwargs, onSettingsSave }) => { + const [localSettings, setLocalSettings] = React.useState>( Object.entries(otherKwargs).map(([key, value], index) => ({ id: index, key, value })) ); @@ -129,8 +108,6 @@ const SettingsModal: React.FC<{ setLocalSettings(Object.entries(otherKwargs).map(([key, value], index) => ({ id: index, key, value }))); }, [otherKwargs]); - if (!isOpen) return null; - const handleSettingsChange = (id: number, newKey: string, newValue: string) => { setLocalSettings(prev => prev.map(setting => setting.id === id ? { ...setting, key: newKey, value: newValue } : setting @@ -162,6 +139,8 @@ const SettingsModal: React.FC<{ new Set(keys).size === keys.length; }; + if (!isOpen) return null; + return ( @@ -200,221 +179,289 @@ const SettingsModal: React.FC<{ ); +}); + +// Action types +type Action = + | { type: 'SET_OPERATION'; payload: Operation } + | { type: 'UPDATE_NAME'; payload: string } + | { type: 'UPDATE_PROMPT'; payload: string } + | { type: 'UPDATE_SCHEMA'; payload: SchemaItem[] } + | { type: 'UPDATE_GUARDRAILS'; payload: string[] } + | { type: 'TOGGLE_EDITING' } + | { type: 'TOGGLE_SCHEMA' } + | { type: 'TOGGLE_GUARDRAILS' } + | { type: 'TOGGLE_SETTINGS' } + | { type: 'SET_RUN_INDEX'; payload: number } + | { type: 'UPDATE_SETTINGS'; payload: Record }; + +// State type +type State = { + operation: Operation | undefined; + isEditing: boolean; + isSchemaExpanded: boolean; + isGuardrailsExpanded: boolean; + isSettingsOpen: boolean; +}; + +// Reducer function +function operationReducer(state: State, action: Action): State { + switch (action.type) { + case 'SET_OPERATION': + return { ...state, operation: action.payload }; + case 'UPDATE_NAME': + return state.operation ? { ...state, operation: { ...state.operation, name: action.payload } } : state; + case 'UPDATE_PROMPT': + return state.operation ? { ...state, operation: { ...state.operation, prompt: action.payload } } : state; + case 'UPDATE_SCHEMA': + return state.operation + ? { + ...state, + operation: { + ...state.operation, + output: { + ...state.operation.output, + schema: action.payload + } + } + } + : state; + + case 'UPDATE_GUARDRAILS': + return state.operation ? { ...state, operation: { ...state.operation, validate: action.payload } } : state; + case 'TOGGLE_EDITING': + return { ...state, isEditing: !state.isEditing }; + case 'TOGGLE_SCHEMA': + return { ...state, isSchemaExpanded: !state.isSchemaExpanded }; + case 'TOGGLE_GUARDRAILS': + return { ...state, isGuardrailsExpanded: !state.isGuardrailsExpanded }; + case 'TOGGLE_SETTINGS': + return { ...state, isSettingsOpen: !state.isSettingsOpen }; + case 'UPDATE_SETTINGS': + return state.operation ? { ...state, operation: { ...state.operation, otherKwargs: action.payload } } : state; + case 'SET_RUN_INDEX': + return state.operation ? { ...state, operation: { ...state.operation, runIndex: action.payload } } : state; + default: + return state; + } +} + +// Initial state +const initialState: State = { + operation: undefined, + isEditing: false, + isSchemaExpanded: false, + isGuardrailsExpanded: false, + isSettingsOpen: false, }; -export const OperationCard: React.FC<{ - operation: Operation; - index: number; - onDelete: (id: string) => void; - onUpdate: (id: string, updatedOperation: Operation) => void; -}> = ({ operation, index, onDelete, onUpdate }) => { - const [schema, setSchema] = useState( - operation.outputSchema - ? Object.entries(operation.outputSchema).map(([key, type]) => ({ key, type: type as SchemaType })) - : [] +// Main component +export const OperationCard: React.FC<{ index: number }> = ({ index }) => { + const [state, dispatch] = useReducer(operationReducer, initialState); + const { operation, isEditing, isSchemaExpanded, isGuardrailsExpanded, isSettingsOpen } = state; + + const { setOutput, isLoadingOutputs, setIsLoadingOutputs, numOpRun, setNumOpRun, currentFile, operations, setOperations, pipelineName, sampleSize, setCost, defaultModel } = usePipelineContext(); + const { toast } = useToast(); + + const operationRef = useRef(operation); + + useEffect(() => { + operationRef.current = operation; + }, [operation]); + + useEffect(() => { + dispatch({ type: 'SET_OPERATION', payload: operations[index] }); + + // Also dispatch the runIndex update + if (operations[index].runIndex !== undefined) { + dispatch({ type: 'SET_RUN_INDEX', payload: operations[index].runIndex }); + } + + }, [operations, index]); + + const schemaItems = useMemo(() => { + return operation?.output?.schema || []; + }, [operation?.output?.schema]); + + const debouncedUpdate = useCallback( + debounce(() => { + if (operationRef.current) { + const updatedOperation = { ...operationRef.current }; + setOperations(prev => prev.map(op => op.id === updatedOperation.id ? updatedOperation : op)); + } + }, 500), + [setOperations] ); - const [isEditing, setIsEditing] = useState(false); - const [editedName, setEditedName] = useState(operation.name); - const [isSchemaExpanded, setIsSchemaExpanded] = useState(schema.length === 0); - const [isGuardrailsExpanded, setIsGuardrailsExpanded] = useState(false); - const [guardrails, setGuardrails] = useState(operation.validate || []); - const { setOutputs, setIsLoadingOutputs } = usePipelineContext(); - const [isSettingsOpen, setIsSettingsOpen] = useState(false); - const [otherKwargs, setOtherKwargs] = useState>(operation.otherKwargs || {}); - const handleSchemaUpdate = (newSchema: SchemaItem[]) => { - setSchema(newSchema); - const newOutputSchema = newSchema.reduce((acc, item) => { - acc[item.key] = item.type; - return acc; - }, {} as Record); - onUpdate(operation.id, { ...operation, outputSchema: newOutputSchema }); - }; + const handleOperationUpdate = useCallback((updatedOperation: Operation) => { + dispatch({ type: 'SET_OPERATION', payload: updatedOperation }); + debouncedUpdate(); + }, [debouncedUpdate]); - const handleNameEdit = () => { - onUpdate(operation.id, { ...operation, name: editedName }); - setIsEditing(false); - }; + const handlePromptChange = useCallback((value: string) => { + dispatch({ type: 'UPDATE_PROMPT', payload: value }); + debouncedUpdate(); + }, [debouncedUpdate]); - const handleRunOperation = async () => { + const handleRunOperation = useCallback(async () => { + if (!operation) return; setIsLoadingOutputs(true); + setNumOpRun(prevNum => { + const newNum = prevNum + 1; + dispatch({ type: 'SET_RUN_INDEX', payload: newNum }); + return newNum; + }); try { - const response = await fetch('/debate_intermediates/extract_themes_and_viewpoints.json'); - let data = await response.json(); - if (data.length > 0 && 'date' in data[0]) { - data.sort((a: { date: string }, b: { date: string }) => new Date(b.date).getTime() - new Date(a.date).getTime()); + const response = await fetch('/api/runPipeline', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + default_model: defaultModel, + data: currentFile, + operations: operations, + operation_id: operation.id, + name: `${pipelineName}.yaml`, + sample_size: sampleSize + }), + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(`Failed to run pipeline: ${errorData.error || response.statusText}`); } - setOutputs(data); + + const { apiResponse, outputPath, inputPath } = await response.json(); + const runCost = apiResponse.cost || 0; + setCost(prevCost => prevCost + runCost); + + toast({ + title: "Operation Cost", + description: `The operation cost $${runCost.toFixed(4)}`, + duration: 3000, + }); + + setOutput({ + path: outputPath, + operationId: operation.id, + inputPath: inputPath + }); } catch (error) { - console.error('Error fetching outputs:', error); + toast({ + title: "Error", + description: error instanceof Error ? error.message : String(error), + variant: "destructive", + }); } finally { setIsLoadingOutputs(false); } - }; - - const handleGuardrailChange = (index: number, value: string) => { - const newGuardrails = [...guardrails]; - newGuardrails[index] = value; - setGuardrails(newGuardrails); - onUpdate(operation.id, { ...operation, validate: newGuardrails }); - }; + }, [operation, currentFile, operations, setOutput, setIsLoadingOutputs, setNumOpRun, toast]); - const addGuardrail = () => { - const newGuardrails = [...guardrails, '']; - setGuardrails(newGuardrails); - onUpdate(operation.id, { ...operation, validate: newGuardrails }); - }; + const handleSettingsSave = useCallback((newSettings: Record) => { + dispatch({ type: 'UPDATE_SETTINGS', payload: newSettings }); + if (operation) { + const updatedOperation = { ...operation, otherKwargs: newSettings }; + setOperations(prev => prev.map(op => op.id === updatedOperation.id ? updatedOperation : op)); + } + }, [operation, setOperations]); - const removeGuardrail = (index: number) => { - const newGuardrails = guardrails.filter((_, i) => i !== index); - setGuardrails(newGuardrails); - onUpdate(operation.id, { ...operation, validate: newGuardrails }); + const handleSchemaUpdate = (newSchema: SchemaItem[]) => { + dispatch({ type: 'UPDATE_SCHEMA', payload: newSchema }); + debouncedUpdate(); }; - const handleSettingsSave = (newSettings: Record) => { - setOtherKwargs(newSettings); - onUpdate(operation.id, { ...operation, otherKwargs: newSettings }); - }; + if (!operation) { + return ; + } return ( +
+
+ {isLoadingOutputs ? ( +
+ ) : operation.runIndex ? ( + <>[{operation.runIndex}] + ) : ( + <>[ ] + )} +
- {(provided) => ( - -
- -
-
- -
- - - -
- {isEditing ? ( - setEditedName(e.target.value)} - onBlur={handleNameEdit} - onKeyPress={(e) => e.key === 'Enter' && handleNameEdit()} - className="text-sm font-medium w-1/2 font-mono" - autoFocus - /> - ) : ( - setIsEditing(true)} - > - {operation.name} ({operation.type}) - - )} - -
- - {operation.llmType === 'LLM' && ( - <> -