Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use statement #936

Merged
merged 5 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions evadb/catalog/catalog_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,21 @@
)
from evadb.catalog.sql_config import IDENTIFIER_COLUMN
from evadb.configuration.configuration_manager import ConfigurationManager
from evadb.executor.executor_utils import ExecutorError
from evadb.expression.function_expression import FunctionExpression
from evadb.expression.tuple_value_expression import TupleValueExpression
from evadb.parser.create_statement import ColConstraintInfo, ColumnDefinition
from evadb.utils.generic_utils import get_str_hash, remove_directory_contents


def generate_sqlalchemy_conn_str(engine: str, params: Dict[str, str]):
if engine == "postgres":
conn_str = f"""postgresql://{params["user"]}:{params["password"]}@{params["host"]}:{params["port"]}/{params["database"]}"""
else:
raise ExecutorError(f"Native engine: {engine} is not currently supported")
return conn_str


def is_video_table(table: TableCatalogEntry):
return table.table_type == TableType.VIDEO_DATA

Expand Down
3 changes: 3 additions & 0 deletions evadb/executor/plan_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from evadb.executor.sample_executor import SampleExecutor
from evadb.executor.seq_scan_executor import SequentialScanExecutor
from evadb.executor.show_info_executor import ShowInfoExecutor
from evadb.executor.sqlalchemy_executor import SQLAlchemyExecutor
from evadb.executor.storage_executor import StorageExecutor
from evadb.executor.union_executor import UnionExecutor
from evadb.executor.vector_index_scan_executor import VectorIndexScanExecutor
Expand Down Expand Up @@ -152,6 +153,8 @@ def _build_execution_tree(
executor_node = VectorIndexScanExecutor(db=self._db, node=plan)
elif plan_opr_type == PlanOprType.DELETE:
executor_node = DeleteExecutor(db=self._db, node=plan)
elif plan_opr_type == PlanOprType.SQLALCHEMY:
executor_node = SQLAlchemyExecutor(db=self._db, node=plan)

# EXPLAIN does not need to build execution tree for its children
if plan_opr_type != PlanOprType.EXPLAIN:
Expand Down
54 changes: 54 additions & 0 deletions evadb/executor/sqlalchemy_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Iterator

import pandas as pd
from sqlalchemy import create_engine

from evadb.catalog.catalog_utils import generate_sqlalchemy_conn_str
from evadb.database import EvaDBDatabase
from evadb.executor.abstract_executor import AbstractExecutor
from evadb.models.storage.batch import Batch
from evadb.plan_nodes.native_plan import SQLAlchemyPlan


class SQLAlchemyExecutor(AbstractExecutor):
"""
Execute SQL through SQLAlchemy engine.
"""

def __init__(self, db: EvaDBDatabase, node: SQLAlchemyPlan):
super().__init__(db, node)
self._database_name = node.database_name
self._query_string = node.query_string

def exec(self, *args, **kwargs) -> Iterator[Batch]:
db_catalog_entry = self.db.catalog().get_database_catalog_entry(
self._database_name
)

conn_str = generate_sqlalchemy_conn_str(
db_catalog_entry.engine,
db_catalog_entry.params,
)

engine = create_engine(conn_str)

with engine.connect() as con:
if "SELECT" in self._query_string or "select" in self._query_string:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to make this robust for cases where select is used for creating tables etc. Also, if query contains Select etc

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the output for CREATE TABLE from sqlalchemy? Is it possible to simply execute the query and yield whatever output sqlalchemy yields in dataframe type?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this idea

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sqlalchemy/sqlalchemy#5433 (comment)

Based on what I see, it is still true for this API for latest SQLAlchemy. There is no rows returned besides SELECT.

yield Batch(pd.read_sql(self._query_string, engine))
else:
con.execute(self._query_string)
yield Batch(pd.DataFrame({"status": ["Ok"]}))
35 changes: 35 additions & 0 deletions evadb/optimizer/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class OperatorType(IntEnum):
LOGICAL_APPLY_AND_MERGE = auto()
LOGICAL_EXTRACT_OBJECT = auto()
LOGICAL_VECTOR_INDEX_SCAN = auto()
LOGICAL_USE = auto()
LOGICALDELIMITER = auto()


Expand Down Expand Up @@ -1239,3 +1240,37 @@ def __hash__(self) -> int:
self.search_query_expr,
)
)


class LogicalUse(Operator):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also bypass optimizer for use? Just to simplify the code

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will push the code to take care of this

def __init__(self, database_name: str, query_string: str, children: List = None):
super().__init__(OperatorType.LOGICAL_USE, children)
self._database_name = database_name
self._query_string = query_string

@property
def database_name(self):
return self._database_name

@property
def query_string(self):
return self._query_string

def __eq__(self, other):
is_subtree_equal = super().__eq__(other)
if not isinstance(other, LogicalUse):
return False
return (
is_subtree_equal
and self.database_name == other.database_name
and self.query_string == other.query_string
)

def __hash__(self) -> int:
return hash(
(
super().__hash__(),
self.database_name,
self.query_string,
)
)
18 changes: 18 additions & 0 deletions evadb/optimizer/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
LogicalSample,
LogicalShow,
LogicalUnion,
LogicalUse,
LogicalVectorIndexScan,
Operator,
OperatorType,
Expand All @@ -93,6 +94,7 @@
from evadb.plan_nodes.lateral_join_plan import LateralJoinPlan
from evadb.plan_nodes.limit_plan import LimitPlan
from evadb.plan_nodes.load_data_plan import LoadDataPlan
from evadb.plan_nodes.native_plan import SQLAlchemyPlan
from evadb.plan_nodes.orderby_plan import OrderByPlan
from evadb.plan_nodes.rename_plan import RenamePlan
from evadb.plan_nodes.seq_scan_plan import SeqScanPlan
Expand Down Expand Up @@ -1331,5 +1333,21 @@ def apply(self, before: LogicalProject, context: OptimizerContext):
yield exchange_plan


class LogicalUseToPhysical(Rule):
def __init__(self):
pattern = Pattern(OperatorType.LOGICAL_USE)
super().__init__(RuleType.LOGICAL_USE_TO_PHYSICAL, pattern)

def promise(self):
return Promise.LOGICAL_USE_TO_PHYSICAL

def check(self, before: LogicalUse, context: OptimizerContext):
return True

def apply(self, before: LogicalUse, context: OptimizerContext):
# TODO: convert to different physical plan based on USE operator
yield SQLAlchemyPlan(before.database_name, before.query_string)


# IMPLEMENTATION RULES END
##############################################
2 changes: 2 additions & 0 deletions evadb/optimizer/rules/rules_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class RuleType(Flag):
LOGICAL_CREATE_INDEX_TO_VECTOR_INDEX = auto()
LOGICAL_APPLY_AND_MERGE_TO_PHYSICAL = auto()
LOGICAL_VECTOR_INDEX_SCAN_TO_PHYSICAL = auto()
LOGICAL_USE_TO_PHYSICAL = auto()
IMPLEMENTATION_DELIMITER = auto()

NUM_RULES = auto()
Expand Down Expand Up @@ -124,6 +125,7 @@ class Promise(IntEnum):
LOGICAL_CREATE_INDEX_TO_VECTOR_INDEX = auto()
LOGICAL_APPLY_AND_MERGE_TO_PHYSICAL = auto()
LOGICAL_VECTOR_INDEX_SCAN_TO_PHYSICAL = auto()
LOGICAL_USE_TO_PHYSICAL = auto()

# IMPLEMENTATION DELIMITER
IMPLEMENTATION_DELIMITER = auto()
Expand Down
2 changes: 2 additions & 0 deletions evadb/optimizer/rules/rules_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
LogicalRenameToPhysical,
LogicalShowToPhysical,
LogicalUnionToPhysical,
LogicalUseToPhysical,
LogicalVectorIndexScanToPhysical,
PushDownFilterThroughApplyAndMerge,
PushDownFilterThroughJoin,
Expand Down Expand Up @@ -112,6 +113,7 @@ def __init__(self, config: ConfigurationManager):
LogicalExplainToPhysical(),
LogicalCreateIndexToVectorIndex(),
LogicalVectorIndexScanToPhysical(),
LogicalUseToPhysical(),
]

# These rules are enabled only if
Expand Down
11 changes: 11 additions & 0 deletions evadb/optimizer/statement_to_opr_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
LogicalSample,
LogicalShow,
LogicalUnion,
LogicalUse,
)
from evadb.optimizer.optimizer_utils import (
column_definition_to_udf_io,
Expand All @@ -55,6 +56,7 @@
from evadb.parser.statement import AbstractStatement
from evadb.parser.table_ref import TableRef
from evadb.parser.types import UDFType
from evadb.parser.use_statement import UseStatement
from evadb.utils.logging_manager import logger


Expand Down Expand Up @@ -318,6 +320,13 @@ def visit_delete(self, statement: DeleteTableStatement):
)
self._plan = delete_opr

def visit_use(self, statement: UseStatement):
use_opr = LogicalUse(
statement.database_name,
statement.query_string,
)
self._plan = use_opr

def visit(self, statement: AbstractStatement):
"""Based on the instance of the statement the corresponding
visit is called.
Expand Down Expand Up @@ -348,6 +357,8 @@ def visit(self, statement: AbstractStatement):
self.visit_create_index(statement)
elif isinstance(statement, DeleteTableStatement):
self.visit_delete(statement)
elif isinstance(statement, UseStatement):
self.visit_use(statement)
return self._plan

@property
Expand Down
14 changes: 13 additions & 1 deletion evadb/parser/evadb.lark
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

start: (sql_statement? ";")+

sql_statement: ddl_statement | dml_statement | utility_statement
sql_statement: ddl_statement | dml_statement | utility_statement | context_statement

ddl_statement: create_database | create_table | create_index | create_udf
| drop_database | drop_table | drop_udf | drop_index | rename_table
Expand All @@ -12,6 +12,8 @@ dml_statement: select_statement | insert_statement | update_statement

utility_statement: describe_statement | show_statement | help_statement | explain_statement

context_statement: use_statement

// Data Definition Language

// Create statements
Expand Down Expand Up @@ -172,11 +174,19 @@ explain_statement: EXPLAIN explainable_statement

explainable_statement : select_statement | insert_statement | update_statement | delete_statement | create_table

// Context Statements

use_statement: USE database_name "{" query_string "}" // One shortcoming that query string cannot have parenthesis
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the limitation of the lark or ? Why we can not have parenthesis?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will remove this comment. The native query can have parenthesis after I change to use curly bracket if that makes sense

// now
USE postgres {
   // some query
}

// before
USE postgres (
    // some query
)

If I use the parenthesis and the native query has arbitrary parentheses as well, I have some trouble of coming up with a grammar that works for all cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me take care of it


// Common Clauses

// DB Objects

query_string: QUERY_STRING

full_id: uid dotted_id?

database_name: full_id

table_name: full_id

Expand Down Expand Up @@ -360,6 +370,7 @@ PARAMETERS: "PARAMETERS"i
PRIMARY: "PRIMARY"i
REFERENCES: "REFERENCES"i
RENAME: "RENAME"i
USE: "USE"i
SAMPLE: "SAMPLE"i
IFRAMES: "IFRAMES"i
AUDIORATE: "AUDIORATE"i
Expand Down Expand Up @@ -556,6 +567,7 @@ ID_LITERAL: /[A-Za-z_$0-9]*?[A-Za-z_$]+?[A-Za-z_$0-9]*/
DQUOTA_STRING: /"[^";]*"/
SQUOTA_STRING: /'[^';]*'/
BQUOTA_STRING: /`[^'`]*`/
QUERY_STRING: /[^{};]+/
DEC_DIGIT: /[0-9]/

// LARK
Expand Down
2 changes: 2 additions & 0 deletions evadb/parser/lark_visitor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from evadb.parser.lark_visitor._select_statement import Select
from evadb.parser.lark_visitor._show_statements import Show
from evadb.parser.lark_visitor._table_sources import TableSources
from evadb.parser.lark_visitor._use_statement import Use

# To add new functionality to the parser, create a new file under
# the lark_visitor directory, and implement a new class which
Expand Down Expand Up @@ -70,6 +71,7 @@ class LarkInterpreter(
Show,
Explain,
Delete,
Use,
):
def __init__(self, query):
super().__init__()
Expand Down
28 changes: 28 additions & 0 deletions evadb/parser/lark_visitor/_use_statement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from lark import Tree

from evadb.parser.use_statement import UseStatement


class Use:
def use_statement(self, tree):
for child in tree.children:
if isinstance(child, Tree):
if child.data == "database_name":
database_name = self.visit(child)
if child.data == "query_string":
query_string = self.visit(child)
return UseStatement(database_name, query_string)
1 change: 1 addition & 0 deletions evadb/parser/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class StatementType(EvaDBEnum):
EXPLAIN # noqa: F821
CREATE_INDEX # noqa: F821
CREATE_DATABASE # noqa: F821
USE # noqa: F821
# add other types


Expand Down
Loading