Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QueryBuilder: Implementation of contains Filter Operator applied to JSON Fields for SQLite backend #6619

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a9165f9
add tests to `contains` filter operator on PostgreSQL backend
dependabot[bot] Nov 6, 2024
ab88f00
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 19, 2024
518cc6b
Merge branch 'main' into test-json-contains
rabbull Nov 19, 2024
ec36aae
temp
rabbull Nov 19, 2024
474dd26
add tests for nested arrays
rabbull Nov 19, 2024
a759d43
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 19, 2024
5b7b62b
update
rabbull Nov 19, 2024
e23ec32
custom function
rabbull Nov 19, 2024
e530f03
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 19, 2024
6df03e3
cleanup
rabbull Nov 19, 2024
84c50f6
catchup
rabbull Nov 19, 2024
f787404
fix compilation error on py39
rabbull Nov 20, 2024
36c7102
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2024
dcd3cf9
add benchmark
rabbull Nov 20, 2024
d293d18
ignore benchmark results
rabbull Nov 20, 2024
a34623f
Merge branch 'test-json-contains' into sqlite-json-contains
rabbull Nov 20, 2024
079cc32
remove requires_psql marks for sqlite tests
rabbull Nov 20, 2024
598f821
temp
rabbull Nov 21, 2024
93ad037
add benchmark
rabbull Nov 23, 2024
93966cd
Merge branch 'sqlite-json-contains' of github.com:rabbull/aiida-core …
rabbull Nov 23, 2024
9293c67
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 23, 2024
180bec0
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Nov 26, 2024
edfe2b2
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Nov 26, 2024
2189a81
migrate sqlite filter tests to orm
rabbull Nov 26, 2024
ffa0b11
add comment on impl for psql of_type
rabbull Nov 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/aiida/storage/sqlite_zip/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,7 @@ def _cast_json_type(comparator: JSON.Comparator, value: Any) -> Tuple[ColumnElem
return case((type_filter, casted_entity.ilike(value, escape='\\')), else_=False)

if operator == 'contains':
# to-do, see: https://github.com/sqlalchemy/sqlalchemy/discussions/7836
raise NotImplementedError('The operator `contains` is not implemented for SQLite-based storage plugins.')
return func.json_contains(database_entity, json.dumps(value))

if operator == 'has_key':
return (
Expand Down
31 changes: 31 additions & 0 deletions src/aiida/storage/sqlite_zip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,43 @@
cursor.close()


def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
rabbull marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(lhs, dict) and isinstance(rhs, dict):
for key in rhs:
if key not in lhs or not _contains(lhs[key], rhs[key]):
return False
return True
elif isinstance(lhs, list) and isinstance(rhs, list):
for item in rhs:
if not any(_contains(element, item) for element in lhs):
return False
return True
else:
return lhs == rhs


def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
try:
if isinstance(lhs, (str, bytes, bytearray)):
lhs = json.loads(lhs)
if isinstance(rhs, (str, bytes, bytearray)):
rhs = json.loads(rhs)
except json.JSONDecodeError:
return 0

Check warning on line 73 in src/aiida/storage/sqlite_zip/utils.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/storage/sqlite_zip/utils.py#L72-L73

Added lines #L72 - L73 were not covered by tests
return int(_contains(lhs, rhs))


def register_json_contains(dbapi_connection, _):
dbapi_connection.create_function('json_contains', 2, _json_contains)


def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine:
"""Create a new engine instance."""
engine = create_engine(f'sqlite:///{path}', json_serializer=json.dumps, json_deserializer=json.loads, **kwargs)
event.listen(engine, 'connect', sqlite_case_sensitive_like)
if enforce_foreign_keys:
event.listen(engine, 'connect', sqlite_enforce_foreign_keys)
event.listen(engine, 'connect', register_json_contains)
return engine


Expand Down
160 changes: 160 additions & 0 deletions tests/storage/sqlite/test_orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,166 @@ def test_qb_json_filters(filters, matches):
assert qbuilder.count() == matches


class TestJsonFilters:
@pytest.mark.parametrize(
'data,filters,is_match',
(
# contains different types of element
({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
# contains multiple elements of various types
({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
# contains non-exist elements
({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
# contains empty set
({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
({'arr': []}, {'attributes.arr': {'contains': []}}, True),
# nested arrays
({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),
({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter
({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False),
# TODO: the test below is supposed to pass but currently doesn't
# ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False),
# negations
({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
# TODO: these pass, but why? are these behaviors expected?
# non-exist `attr_key`s
({'foo': []}, {'attributes.arr': {'contains': []}}, False),
# ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
),
ids=json.dumps,
)
@pytest.mark.usefixtures('aiida_profile_clean')
@pytest.mark.requires_psql
def test_json_filters_contains_arrays(self, data, filters, is_match):
"""Test QueryBuilder filter `contains` for JSON array fields"""
profile = SqliteTempBackend.create_profile(debug=False)
backend = SqliteTempBackend(profile)
Dict(data, backend=backend).store()
qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
assert qb.count() in {0, 1}
found = qb.count() == 1
assert found == is_match

@pytest.mark.parametrize(
'data,filters,is_match',
(
# contains different types of values
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'contains': {'k1': 1}}},
True,
),
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}},
True,
),
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'contains': {'k3': None}}},
True,
),
# contains empty set
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'contains': {}}},
True,
),
# doesn't contain non-exist entries
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}},
False,
),
# negations
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'!contains': {'k1': 1}}},
False,
),
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}},
True,
),
(
{
'dict': {
'k1': 1,
'k2': '2',
'k3': None,
}
},
{'attributes.dict': {'!contains': {}}},
False,
),
# TODO: these pass, but why? are these behaviors expected?
# non-exist `attr_key`s
({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
# ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
),
ids=json.dumps,
)
@pytest.mark.usefixtures('aiida_profile_clean')
@pytest.mark.requires_psql
def test_json_filters_contains_object(self, data, filters, is_match):
"""Test QueryBuilder filter `contains` for JSON object fields"""
profile = SqliteTempBackend.create_profile(debug=False)
backend = SqliteTempBackend(profile)
Dict(data, backend=backend).store()
qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
assert qb.count() in {0, 1}
found = qb.count() == 1
assert found == is_match


@pytest.mark.parametrize(
'filters,matches',
(
Expand Down
Loading