From a1820fd6debad7d72cfb644e62dd443e12693aed Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 5 Jul 2023 11:34:01 +0200 Subject: [PATCH] SQLAlchemy: Use JSON type adapter for implementing CrateDB's `OBJECT` SQLAlchemy's `sqltypes.JSON` provides a facade for vendor-specific JSON types. Since it supports JSON SQL operations, it only works on backends that have an actual JSON type, which are currently PostgreSQL, MySQL, SQLite, and Microsoft SQL Server. This patch starts leveraging the same infrastructure, thus bringing corresponding interfaces to the CrateDB dialect. The major difference is that it will not actually do any JSON marshalling, but propagate corresponding data structures 1:1, because within CrateDB's SQL, `OBJECT`s do not need to be serialized into JSON strings before transfer. --- CHANGES.txt | 2 ++ src/crate/client/sqlalchemy/compiler.py | 17 +++++++-- src/crate/client/sqlalchemy/dialect.py | 15 +++++--- .../client/sqlalchemy/tests/compiler_test.py | 4 +-- .../client/sqlalchemy/tests/dict_test.py | 8 ++--- .../client/sqlalchemy/tests/match_test.py | 4 +-- .../client/sqlalchemy/tests/query_caching.py | 28 ++++++++++++++- .../client/sqlalchemy/tests/warnings_test.py | 35 +++++++++++++++++-- src/crate/client/sqlalchemy/types.py | 30 ++++++++++------ 9 files changed, 114 insertions(+), 29 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8b41400c..0bbb7465 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -15,6 +15,8 @@ Unreleased - SQLAlchemy: Fix SQL statement caching for CrateDB's ``OBJECT`` type. +- SQLAlchemy: Refactor ``OBJECT`` type to use SQLAlchemy's JSON type infrastructure. + 2023/04/18 0.31.1 ================= diff --git a/src/crate/client/sqlalchemy/compiler.py b/src/crate/client/sqlalchemy/compiler.py index 3ac6fa57..3965c9e1 100644 --- a/src/crate/client/sqlalchemy/compiler.py +++ b/src/crate/client/sqlalchemy/compiler.py @@ -26,7 +26,7 @@ from sqlalchemy.dialects.postgresql.base import PGCompiler from sqlalchemy.sql import compiler from sqlalchemy.types import String -from .types import MutableDict, _Craty, Geopoint, Geoshape +from .types import MutableDict, ObjectTypeImpl, Geopoint, Geoshape from .sa_version import SA_VERSION, SA_1_4 @@ -41,7 +41,7 @@ def rewrite_update(clauseelement, multiparams, params): "col['x'] = ?, col['y'] = ?", (1, 2) - by using the `Craty` (`MutableDict`) type. + by using the `ObjectType` (`MutableDict`) type. The update statement is only rewritten if an item of the MutableDict was changed. """ @@ -124,7 +124,7 @@ def get_column_specification(self, column, **kwargs): ) if column.dialect_options['crate'].get('index') is False: - if isinstance(column.type, (Geopoint, Geoshape, _Craty)): + if isinstance(column.type, (Geopoint, Geoshape, ObjectTypeImpl)): raise sa.exc.CompileError( "Disabling indexing is not supported for column " "types OBJECT, GEO_POINT, and GEO_SHAPE" @@ -217,6 +217,9 @@ def visit_ARRAY(self, type_, **kw): "CrateDB doesn't support multidimensional arrays") return 'ARRAY({0})'.format(self.process(type_.item_type)) + def visit_OBJECT(self, type_, **kw): + return "OBJECT" + class CrateCompiler(compiler.SQLCompiler): @@ -226,6 +229,14 @@ def visit_getitem_binary(self, binary, operator, **kw): binary.right.value ) + def visit_json_getitem_op_binary( + self, binary, operator, _cast_applied=False, **kw + ): + return "{0}['{1}']".format( + self.process(binary.left, **kw), + binary.right.value + ) + def visit_any(self, element, **kw): return "%s%sANY (%s)" % ( self.process(element.left, **kw), diff --git a/src/crate/client/sqlalchemy/dialect.py b/src/crate/client/sqlalchemy/dialect.py index 5737f994..e992d41a 100644 --- a/src/crate/client/sqlalchemy/dialect.py +++ b/src/crate/client/sqlalchemy/dialect.py @@ -183,10 +183,17 @@ class CrateDialect(default.DefaultDialect): insert_returning = True update_returning = True - def __init__(self, *args, **kwargs): - super(CrateDialect, self).__init__(*args, **kwargs) - # currently our sql parser doesn't support unquoted column names that - # start with _. Adding it here causes sqlalchemy to quote such columns + def __init__(self, **kwargs): + default.DefaultDialect.__init__(self, **kwargs) + + # CrateDB does not need `OBJECT` types to be serialized as JSON. + # Corresponding data is forwarded 1:1, and will get marshalled + # by the low-level driver. + self._json_deserializer = lambda x: x + self._json_serializer = lambda x: x + + # Currently, our SQL parser doesn't support unquoted column names that + # start with _. Adding it here causes sqlalchemy to quote such columns. self.identifier_preparer.illegal_initial_characters.add('_') def initialize(self, connection): diff --git a/src/crate/client/sqlalchemy/tests/compiler_test.py b/src/crate/client/sqlalchemy/tests/compiler_test.py index 31ed7f3c..17612232 100644 --- a/src/crate/client/sqlalchemy/tests/compiler_test.py +++ b/src/crate/client/sqlalchemy/tests/compiler_test.py @@ -27,7 +27,7 @@ from sqlalchemy.sql import text, Update from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_1_4, SA_2_0 -from crate.client.sqlalchemy.types import Craty +from crate.client.sqlalchemy.types import ObjectType class SqlAlchemyCompilerTest(TestCase): @@ -38,7 +38,7 @@ def setUp(self): self.metadata = sa.MetaData() self.mytable = sa.Table('mytable', self.metadata, sa.Column('name', sa.String), - sa.Column('data', Craty)) + sa.Column('data', ObjectType)) self.update = Update(self.mytable).where(text('name=:name')) self.values = [{'name': 'crate'}] diff --git a/src/crate/client/sqlalchemy/tests/dict_test.py b/src/crate/client/sqlalchemy/tests/dict_test.py index 2324591e..9695882b 100644 --- a/src/crate/client/sqlalchemy/tests/dict_test.py +++ b/src/crate/client/sqlalchemy/tests/dict_test.py @@ -31,7 +31,7 @@ except ImportError: from sqlalchemy.ext.declarative import declarative_base -from crate.client.sqlalchemy.types import Craty, ObjectArray +from crate.client.sqlalchemy.types import ObjectArray, ObjectType from crate.client.cursor import Cursor @@ -47,7 +47,7 @@ def setUp(self): metadata = sa.MetaData() self.mytable = sa.Table('mytable', metadata, sa.Column('name', sa.String), - sa.Column('data', Craty)) + sa.Column('data', ObjectType)) def assertSQL(self, expected_str, selectable): actual_expr = selectable.compile(bind=self.engine) @@ -124,7 +124,7 @@ class Character(Base): __tablename__ = 'characters' name = sa.Column(sa.String, primary_key=True) age = sa.Column(sa.Integer) - data = sa.Column(Craty) + data = sa.Column(ObjectType) data_list = sa.Column(ObjectArray) session = Session(bind=self.engine) @@ -140,7 +140,7 @@ def test_assign_null_to_object_array(self): self.assertEqual(char_3.data_list, [None]) @patch('crate.client.connection.Cursor', FakeCursor) - def test_assign_to_craty_type_after_commit(self): + def test_assign_to_object_type_after_commit(self): session, Character = self.set_up_character_and_cursor( return_value=[('Trillian', None)] ) diff --git a/src/crate/client/sqlalchemy/tests/match_test.py b/src/crate/client/sqlalchemy/tests/match_test.py index fdd5b7d0..735709c3 100644 --- a/src/crate/client/sqlalchemy/tests/match_test.py +++ b/src/crate/client/sqlalchemy/tests/match_test.py @@ -30,7 +30,7 @@ except ImportError: from sqlalchemy.ext.declarative import declarative_base -from crate.client.sqlalchemy.types import Craty +from crate.client.sqlalchemy.types import ObjectType from crate.client.sqlalchemy.predicates import match from crate.client.cursor import Cursor @@ -60,7 +60,7 @@ def set_up_character_and_session(self): class Character(Base): __tablename__ = 'characters' name = sa.Column(sa.String, primary_key=True) - info = sa.Column(Craty) + info = sa.Column(ObjectType) session = Session(bind=self.engine) return session, Character diff --git a/src/crate/client/sqlalchemy/tests/query_caching.py b/src/crate/client/sqlalchemy/tests/query_caching.py index fb4bdec3..037d6423 100644 --- a/src/crate/client/sqlalchemy/tests/query_caching.py +++ b/src/crate/client/sqlalchemy/tests/query_caching.py @@ -76,7 +76,7 @@ def setup_data(self): self.session.execute(sa.text("REFRESH TABLE testdrive.characters;")) @skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'") - def test_object_multiple_select(self): + def test_object_multiple_select_legacy(self): """ The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed access to the instance's content in form of a dictionary. Thus, it must @@ -85,6 +85,8 @@ def test_object_multiple_select(self): This test verifies that two subsequent `SELECT` statements are translated well, and don't trip on incorrect SQL compiled statement caching. + + This variant uses direct value matching on the `OBJECT`s attribute. """ self.setup_data() Character = self.Character @@ -97,6 +99,30 @@ def test_object_multiple_select(self): result = self.session.execute(selectable).scalar_one().data self.assertEqual({"y": 2}, result) + @skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'") + def test_object_multiple_select_modern(self): + """ + The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed + access to the instance's content in form of a dictionary. Thus, it must + not use `cache_ok = True` on its implementation, i.e. this part of the + compiled SQL clause must not be cached. + + This test verifies that two subsequent `SELECT` statements are translated + well, and don't trip on incorrect SQL compiled statement caching. + + This variant uses comparator method matching on the `OBJECT`s attribute. + """ + self.setup_data() + Character = self.Character + + selectable = sa.select(Character).where(Character.data['x'].as_integer() == 1) + result = self.session.execute(selectable).scalar_one().data + self.assertEqual({"x": 1}, result) + + selectable = sa.select(Character).where(Character.data['y'].as_integer() == 2) + result = self.session.execute(selectable).scalar_one().data + self.assertEqual({"y": 2}, result) + @skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'") def test_objectarray_multiple_select(self): """ diff --git a/src/crate/client/sqlalchemy/tests/warnings_test.py b/src/crate/client/sqlalchemy/tests/warnings_test.py index c300ad8c..80023005 100644 --- a/src/crate/client/sqlalchemy/tests/warnings_test.py +++ b/src/crate/client/sqlalchemy/tests/warnings_test.py @@ -8,14 +8,17 @@ class SqlAlchemyWarningsTest(TestCase, ExtraAssertions): + """ + Verify a few `DeprecationWarning` spots. + + https://docs.python.org/3/library/warnings.html#testing-warnings + """ @skipIf(SA_VERSION >= SA_1_4, "There is no deprecation warning for " "SQLAlchemy 1.3 on higher versions") def test_sa13_deprecation_warning(self): """ Verify that a `DeprecationWarning` is issued when running SQLAlchemy 1.3. - - https://docs.python.org/3/library/warnings.html#testing-warnings """ with warnings.catch_warnings(record=True) as w: @@ -31,3 +34,31 @@ def test_sa13_deprecation_warning(self): self.assertEqual(len(w), 1) self.assertIsSubclass(w[-1].category, DeprecationWarning) self.assertIn("SQLAlchemy 1.3 is effectively EOL.", str(w[-1].message)) + + def test_craty_object_deprecation_warning(self): + """ + Verify that a `DeprecationWarning` is issued when accessing the deprecated + module variables `Craty`, and `Object`. The new type is called `ObjectType`. + """ + + with warnings.catch_warnings(record=True) as w: + + # Import the deprecated symbol. + from crate.client.sqlalchemy.types import Craty # noqa: F401 + + # Verify details of the deprecation warning. + self.assertEqual(len(w), 1) + self.assertIsSubclass(w[-1].category, DeprecationWarning) + self.assertIn("Craty is deprecated and will be removed in future releases. " + "Please use ObjectType instead.", str(w[-1].message)) + + with warnings.catch_warnings(record=True) as w: + + # Import the deprecated symbol. + from crate.client.sqlalchemy.types import Object # noqa: F401 + + # Verify details of the deprecation warning. + self.assertEqual(len(w), 1) + self.assertIsSubclass(w[-1].category, DeprecationWarning) + self.assertIn("Object is deprecated and will be removed in future releases. " + "Please use ObjectType instead.", str(w[-1].message)) diff --git a/src/crate/client/sqlalchemy/types.py b/src/crate/client/sqlalchemy/types.py index 587858ac..f9899d92 100644 --- a/src/crate/client/sqlalchemy/types.py +++ b/src/crate/client/sqlalchemy/types.py @@ -18,6 +18,7 @@ # However, if you have executed another commercial license agreement # with Crate these terms will supersede the license and you may use the # software solely pursuant to the terms of the relevant commercial agreement. +import warnings import sqlalchemy.types as sqltypes from sqlalchemy.sql import operators, expression @@ -131,24 +132,31 @@ def __eq__(self, other): return dict.__eq__(self, other) -class _Craty(sqltypes.UserDefinedType): +class ObjectTypeImpl(sqltypes.UserDefinedType, sqltypes.JSON): + + __visit_name__ = "OBJECT" + cache_ok = False + none_as_null = False - class Comparator(sqltypes.TypeEngine.Comparator): - def __getitem__(self, key): - return default_comparator._binary_operate(self.expr, - operators.getitem, - key) +# Designated name to refer to. `Object` is too ambiguous. +ObjectType = MutableDict.as_mutable(ObjectTypeImpl) - def get_col_spec(self): - return 'OBJECT' +# Backward-compatibility aliases. +_deprecated_Craty = ObjectType +_deprecated_Object = ObjectType - type = MutableDict - comparator_factory = Comparator +# https://www.lesinskis.com/deprecating-module-scope-variables.html +deprecated_names = ["Craty", "Object"] -Object = Craty = MutableDict.as_mutable(_Craty) +def __getattr__(name): + if name in deprecated_names: + warnings.warn(f"{name} is deprecated and will be removed in future releases. " + f"Please use ObjectType instead.", DeprecationWarning) + return globals()[f"_deprecated_{name}"] + raise AttributeError(f"module {__name__} has no attribute {name}") class Any(expression.ColumnElement):