Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQLAlchemy: Use JSON type adapter for implementing CrateDB's OBJECT #561

Merged
merged 2 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ name: Tests
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
pull_request: ~
workflow_dispatch:

concurrency:
Expand Down
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Unreleased

- SQLAlchemy: Fix SQL statement caching for CrateDB's ``OBJECT`` type.

- SQLAlchemy: Refactor ``OBJECT`` type to use SQLAlchemy's JSON type infrastructure.


2023/04/18 0.31.1
=================
Expand Down
17 changes: 14 additions & 3 deletions src/crate/client/sqlalchemy/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from sqlalchemy.dialects.postgresql.base import PGCompiler
from sqlalchemy.sql import compiler
from sqlalchemy.types import String
from .types import MutableDict, _Craty, Geopoint, Geoshape
from .types import MutableDict, ObjectTypeImpl, Geopoint, Geoshape
from .sa_version import SA_VERSION, SA_1_4


Expand All @@ -41,7 +41,7 @@ def rewrite_update(clauseelement, multiparams, params):

"col['x'] = ?, col['y'] = ?", (1, 2)

by using the `Craty` (`MutableDict`) type.
by using the `ObjectType` (`MutableDict`) type.
The update statement is only rewritten if an item of the MutableDict was
changed.
"""
Expand Down Expand Up @@ -124,7 +124,7 @@ def get_column_specification(self, column, **kwargs):
)

if column.dialect_options['crate'].get('index') is False:
if isinstance(column.type, (Geopoint, Geoshape, _Craty)):
if isinstance(column.type, (Geopoint, Geoshape, ObjectTypeImpl)):
raise sa.exc.CompileError(
"Disabling indexing is not supported for column "
"types OBJECT, GEO_POINT, and GEO_SHAPE"
Expand Down Expand Up @@ -217,6 +217,9 @@ def visit_ARRAY(self, type_, **kw):
"CrateDB doesn't support multidimensional arrays")
return 'ARRAY({0})'.format(self.process(type_.item_type))

def visit_OBJECT(self, type_, **kw):
return "OBJECT"


class CrateCompiler(compiler.SQLCompiler):

Expand All @@ -226,6 +229,14 @@ def visit_getitem_binary(self, binary, operator, **kw):
binary.right.value
)

def visit_json_getitem_op_binary(
self, binary, operator, _cast_applied=False, **kw
):
return "{0}['{1}']".format(
self.process(binary.left, **kw),
binary.right.value
)

def visit_any(self, element, **kw):
return "%s%sANY (%s)" % (
self.process(element.left, **kw),
Expand Down
15 changes: 11 additions & 4 deletions src/crate/client/sqlalchemy/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,17 @@ class CrateDialect(default.DefaultDialect):
insert_returning = True
update_returning = True

def __init__(self, *args, **kwargs):
super(CrateDialect, self).__init__(*args, **kwargs)
# currently our sql parser doesn't support unquoted column names that
# start with _. Adding it here causes sqlalchemy to quote such columns
def __init__(self, **kwargs):
default.DefaultDialect.__init__(self, **kwargs)

# CrateDB does not need `OBJECT` types to be serialized as JSON.
# Corresponding data is forwarded 1:1, and will get marshalled
# by the low-level driver.
self._json_deserializer = lambda x: x
self._json_serializer = lambda x: x

# Currently, our SQL parser doesn't support unquoted column names that
# start with _. Adding it here causes sqlalchemy to quote such columns.
self.identifier_preparer.illegal_initial_characters.add('_')

def initialize(self, connection):
Expand Down
4 changes: 2 additions & 2 deletions src/crate/client/sqlalchemy/tests/compiler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from sqlalchemy.sql import text, Update

from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_1_4, SA_2_0
from crate.client.sqlalchemy.types import Craty
from crate.client.sqlalchemy.types import ObjectType


class SqlAlchemyCompilerTest(TestCase):
Expand All @@ -38,7 +38,7 @@ def setUp(self):
self.metadata = sa.MetaData()
self.mytable = sa.Table('mytable', self.metadata,
sa.Column('name', sa.String),
sa.Column('data', Craty))
sa.Column('data', ObjectType))

self.update = Update(self.mytable).where(text('name=:name'))
self.values = [{'name': 'crate'}]
Expand Down
8 changes: 4 additions & 4 deletions src/crate/client/sqlalchemy/tests/dict_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
except ImportError:
from sqlalchemy.ext.declarative import declarative_base

from crate.client.sqlalchemy.types import Craty, ObjectArray
from crate.client.sqlalchemy.types import ObjectArray, ObjectType
from crate.client.cursor import Cursor


Expand All @@ -47,7 +47,7 @@ def setUp(self):
metadata = sa.MetaData()
self.mytable = sa.Table('mytable', metadata,
sa.Column('name', sa.String),
sa.Column('data', Craty))
sa.Column('data', ObjectType))

def assertSQL(self, expected_str, selectable):
actual_expr = selectable.compile(bind=self.engine)
Expand Down Expand Up @@ -124,7 +124,7 @@ class Character(Base):
__tablename__ = 'characters'
name = sa.Column(sa.String, primary_key=True)
age = sa.Column(sa.Integer)
data = sa.Column(Craty)
data = sa.Column(ObjectType)
data_list = sa.Column(ObjectArray)

session = Session(bind=self.engine)
Expand All @@ -140,7 +140,7 @@ def test_assign_null_to_object_array(self):
self.assertEqual(char_3.data_list, [None])

@patch('crate.client.connection.Cursor', FakeCursor)
def test_assign_to_craty_type_after_commit(self):
def test_assign_to_object_type_after_commit(self):
session, Character = self.set_up_character_and_cursor(
return_value=[('Trillian', None)]
)
Expand Down
4 changes: 2 additions & 2 deletions src/crate/client/sqlalchemy/tests/match_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
except ImportError:
from sqlalchemy.ext.declarative import declarative_base

from crate.client.sqlalchemy.types import Craty
from crate.client.sqlalchemy.types import ObjectType
from crate.client.sqlalchemy.predicates import match
from crate.client.cursor import Cursor

Expand Down Expand Up @@ -60,7 +60,7 @@ def set_up_character_and_session(self):
class Character(Base):
__tablename__ = 'characters'
name = sa.Column(sa.String, primary_key=True)
info = sa.Column(Craty)
info = sa.Column(ObjectType)

session = Session(bind=self.engine)
return session, Character
Expand Down
28 changes: 27 additions & 1 deletion src/crate/client/sqlalchemy/tests/query_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def setup_data(self):
self.session.execute(sa.text("REFRESH TABLE testdrive.characters;"))

@skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'")
def test_object_multiple_select(self):
def test_object_multiple_select_legacy(self):
"""
The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed
access to the instance's content in form of a dictionary. Thus, it must
Expand All @@ -85,6 +85,8 @@ def test_object_multiple_select(self):
This test verifies that two subsequent `SELECT` statements are translated
well, and don't trip on incorrect SQL compiled statement caching.
This variant uses direct value matching on the `OBJECT`s attribute.
"""
self.setup_data()
Character = self.Character
Expand All @@ -97,6 +99,30 @@ def test_object_multiple_select(self):
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"y": 2}, result)

@skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'")
def test_object_multiple_select_modern(self):
"""
The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed
access to the instance's content in form of a dictionary. Thus, it must
not use `cache_ok = True` on its implementation, i.e. this part of the
compiled SQL clause must not be cached.
This test verifies that two subsequent `SELECT` statements are translated
well, and don't trip on incorrect SQL compiled statement caching.
This variant uses comparator method matching on the `OBJECT`s attribute.
"""
self.setup_data()
Character = self.Character

selectable = sa.select(Character).where(Character.data['x'].as_integer() == 1)
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"x": 1}, result)

selectable = sa.select(Character).where(Character.data['y'].as_integer() == 2)
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"y": 2}, result)

@skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'")
def test_objectarray_multiple_select(self):
"""
Expand Down
35 changes: 33 additions & 2 deletions src/crate/client/sqlalchemy/tests/warnings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@


class SqlAlchemyWarningsTest(TestCase, ExtraAssertions):
"""
Verify a few `DeprecationWarning` spots.
https://docs.python.org/3/library/warnings.html#testing-warnings
"""

@skipIf(SA_VERSION >= SA_1_4, "There is no deprecation warning for "
"SQLAlchemy 1.3 on higher versions")
def test_sa13_deprecation_warning(self):
"""
Verify that a `DeprecationWarning` is issued when running SQLAlchemy 1.3.
https://docs.python.org/3/library/warnings.html#testing-warnings
"""
with warnings.catch_warnings(record=True) as w:

Expand All @@ -31,3 +34,31 @@ def test_sa13_deprecation_warning(self):
self.assertEqual(len(w), 1)
self.assertIsSubclass(w[-1].category, DeprecationWarning)
self.assertIn("SQLAlchemy 1.3 is effectively EOL.", str(w[-1].message))

def test_craty_object_deprecation_warning(self):
"""
Verify that a `DeprecationWarning` is issued when accessing the deprecated
module variables `Craty`, and `Object`. The new type is called `ObjectType`.
"""

with warnings.catch_warnings(record=True) as w:

# Import the deprecated symbol.
from crate.client.sqlalchemy.types import Craty # noqa: F401

# Verify details of the deprecation warning.
self.assertEqual(len(w), 1)
self.assertIsSubclass(w[-1].category, DeprecationWarning)
self.assertIn("Craty is deprecated and will be removed in future releases. "
"Please use ObjectType instead.", str(w[-1].message))

with warnings.catch_warnings(record=True) as w:

# Import the deprecated symbol.
from crate.client.sqlalchemy.types import Object # noqa: F401

# Verify details of the deprecation warning.
self.assertEqual(len(w), 1)
self.assertIsSubclass(w[-1].category, DeprecationWarning)
self.assertIn("Object is deprecated and will be removed in future releases. "
"Please use ObjectType instead.", str(w[-1].message))
30 changes: 19 additions & 11 deletions src/crate/client/sqlalchemy/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# However, if you have executed another commercial license agreement
# with Crate these terms will supersede the license and you may use the
# software solely pursuant to the terms of the relevant commercial agreement.
import warnings

import sqlalchemy.types as sqltypes
from sqlalchemy.sql import operators, expression
Expand Down Expand Up @@ -131,24 +132,31 @@ def __eq__(self, other):
return dict.__eq__(self, other)


class _Craty(sqltypes.UserDefinedType):
class ObjectTypeImpl(sqltypes.UserDefinedType, sqltypes.JSON):

__visit_name__ = "OBJECT"

cache_ok = False
Comment on lines +135 to 139
Copy link
Member Author

@amotl amotl Jul 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need to use cache_ok = False, otherwise things go south on indexed dictionary access. So, it does not improve the situation wrt. GH-559. I will ask for advise on the upstream issue tracker about this detail.

none_as_null = False

class Comparator(sqltypes.TypeEngine.Comparator):

def __getitem__(self, key):
return default_comparator._binary_operate(self.expr,
operators.getitem,
key)
# Designated name to refer to. `Object` is too ambiguous.
ObjectType = MutableDict.as_mutable(ObjectTypeImpl)
Comment on lines +143 to +144
Copy link
Member Author

@amotl amotl Jul 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SQLAlchemy also offers a MutableDict implementation. It looks like using that would resolve some other specializations of the CrateDB dialect, which are a bit anomalous, as we discovered on behalf of the discussion at sqlalchemy/sqlalchemy#5915.

However, it did not work out of the box. SQLAlchemy's MutableDict, or associated code paths, did not seem to support nested object access well. Maybe indeed this is one of the essential specializations the CrateDB dialect has to offer.

This topic will need to be re-visited. /cc GH-425


def get_col_spec(self):
return 'OBJECT'
# Backward-compatibility aliases.
_deprecated_Craty = ObjectType

Check notice

Code scanning / CodeQL

Unused global variable

The global variable '_deprecated_Craty' is not used.
_deprecated_Object = ObjectType

Check notice

Code scanning / CodeQL

Unused global variable

The global variable '_deprecated_Object' is not used.

type = MutableDict
comparator_factory = Comparator
# https://www.lesinskis.com/deprecating-module-scope-variables.html
deprecated_names = ["Craty", "Object"]


Object = Craty = MutableDict.as_mutable(_Craty)
def __getattr__(name):
if name in deprecated_names:
warnings.warn(f"{name} is deprecated and will be removed in future releases. "
f"Please use ObjectType instead.", DeprecationWarning)
return globals()[f"_deprecated_{name}"]
raise AttributeError(f"module {__name__} has no attribute {name}")


class Any(expression.ColumnElement):
Expand Down