Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lineage : DB boilerplating #191

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docetl/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@
from rich.status import Status
from rich.traceback import install

from docetl.dataset import Dataset, create_parsing_tool_map
from docetl.dataset import Dataset
from docetl.operations import get_operation
from docetl.operations.base import BaseOperation
from docetl.operations.utils import flush_cache
from docetl.helper.cache import flush_cache
from docetl.optimizers.join_optimizer import JoinOptimizer
from docetl.optimizers.map_optimizer import MapOptimizer
from docetl.optimizers.reduce_optimizer import ReduceOptimizer
from docetl.optimizers.utils import LLMClient
from docetl.config_wrapper import ConfigWrapper

install(show_locals=True)

Expand Down
2 changes: 1 addition & 1 deletion docetl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import typer

from docetl.operations.utils import clear_cache as cc
from docetl.helper.cache import clear_cache as cc
from docetl.runner import DSLRunner

from dotenv import load_dotenv
Expand Down
4 changes: 2 additions & 2 deletions docetl/config_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os
from docetl.console import get_console
from docetl.utils import load_config
from typing import Any, Dict, List, Optional, Tuple, Union
from docetl.operations.utils import APIWrapper
from typing import Dict, Optional
from docetl.helper.api_wrapper import APIWrapper
import pyrate_limiter
from inspect import isawaitable
import math
Expand Down
81 changes: 76 additions & 5 deletions docetl/console.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import os
from typing import Any, Optional
from rich.console import Console
from io import StringIO
import threading
import queue
from io import StringIO
from multiprocessing.util import DEFAULT_LOGGING_FORMAT
from typing import override, Optional, Union, Any

from rich.console import Console, JustifyMethod
from rich.style import Style

from docetl.helper.database import DatabaseUtil


class ThreadSafeConsole(Console):
Expand All @@ -13,12 +17,14 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.input_event = threading.Event()
self.input_value = None
self.conn: Optional[DatabaseUtil] = None
self.is_write_to_db = False

def print(self, *args, **kwargs):
super().print(*args, **kwargs)

def input(
self, prompt="", *, markup: bool = True, emoji: bool = True, **kwargs
self, prompt="", *, markup: bool = True, emoji: bool = True, **kwargs
) -> str:
if prompt:
self.print(prompt, markup=markup, emoji=emoji, end="")
Expand All @@ -36,6 +42,68 @@ def post_input(self, value: str):
self.input_value = value
self.input_event.set()

def with_db_logging_enabled(self, conn: DatabaseUtil, table_name: str, schema : dict) -> "ThreadSafeConsole":
self.conn = conn
self.is_write_to_db = True
self.schema = schema
self.table_name = table_name
return self

@override(Console.log)
def log(self,
*objects: Any,
sep: str = " ",
end: str = "\n",
style: Optional[Union[str, Style]] = None,
justify: Optional[JustifyMethod] = None,
emoji: Optional[bool] = None,
markup: Optional[bool] = None,
highlight: Optional[bool] = None,
log_locals: bool = False,
_stack_offset: int = 1,
):
# call super method
super().log(*objects, sep=sep, end=end, style=style, justify=justify, emoji=emoji, markup=markup,
highlight=highlight, log_locals=log_locals, _stack_offset=_stack_offset)
if self.is_write_to_db:
self.conn.log_to_db(log_data=str(*objects), schema=self.schema, table_name=self.table_name)


class DocETLLog(Console):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.conn: Optional[DatabaseUtil] = None
self.is_write_to_db = False

def with_db_logging_enabled(self, conn: DatabaseUtil, table_name: str, schema: dict) -> "DocETLLog":
self.conn = conn
self.is_write_to_db = True
self.schema = schema
self.table_name = table_name
return self

@override(Console.log)
def log(
self,
*objects: Any,
sep: str = " ",
end: str = "\n",
style: Optional[Union[str, Style]] = None,
justify: Optional[JustifyMethod] = None,
emoji: Optional[bool] = None,
markup: Optional[bool] = None,
highlight: Optional[bool] = None,
log_locals: bool = False,
_stack_offset: int = 1,
):
# call super method
super().log(*objects, sep=sep, end=end, style=style, justify=justify, emoji=emoji, markup=markup,
highlight=highlight, log_locals=log_locals, _stack_offset=_stack_offset)
if self.is_write_to_db:
# this needs to be dictionary of the schema type
# user defined schema is causing troubles, strict schema then ?
DatabaseUtil.DEFAULT_LOG_SCHEMA()
self.conn.log_to_db(log_data=str(*objects), schema=self.schema, table_name=self.table_name)

def get_console():
# Check if we're running with a frontend
Expand All @@ -50,4 +118,7 @@ def get_console():
return Console()


# override log function to take in a sqlite database object and writes logs to the database


DOCETL_CONSOLE = get_console()
Empty file added docetl/helper/__init__.py
Empty file.
Loading