Skip to content

Commit

Permalink
feat(query): support map_keys and map_values function (#15291)
Browse files Browse the repository at this point in the history
  • Loading branch information
b41sh authored Apr 22, 2024
1 parent d21a3b9 commit 316b635
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 0 deletions.
37 changes: 37 additions & 0 deletions src/query/functions/src/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use databend_common_expression::types::GenericType;
use databend_common_expression::types::MapType;
use databend_common_expression::types::NullType;
use databend_common_expression::types::NullableType;
use databend_common_expression::vectorize_1_arg;
use databend_common_expression::vectorize_with_builder_2_arg;
use databend_common_expression::FunctionDomain;
use databend_common_expression::FunctionRegistry;
Expand Down Expand Up @@ -119,4 +120,40 @@ pub fn register(registry: &mut FunctionRegistry) {
}
),
);

registry.register_1_arg_core::<EmptyMapType, EmptyArrayType, _, _>(
"map_keys",
|_, _| FunctionDomain::Full,
|_, _| Value::Scalar(()),
);

registry.register_passthrough_nullable_1_arg::<MapType<GenericType<0>, GenericType<1>>, ArrayType<GenericType<0>>, _, _>(
"map_keys",
|_, domain| {
FunctionDomain::Domain(
domain.clone().map(|(key_domain, _)| key_domain.clone())
)
},
vectorize_1_arg::<MapType<GenericType<0>, GenericType<1>>, ArrayType<GenericType<0>>>(
|map, _| map.keys
),
);

registry.register_1_arg_core::<EmptyMapType, EmptyArrayType, _, _>(
"map_values",
|_, _| FunctionDomain::Full,
|_, _| Value::Scalar(()),
);

registry.register_passthrough_nullable_1_arg::<MapType<GenericType<0>, GenericType<1>>, ArrayType<GenericType<1>>, _, _>(
"map_values",
|_, domain| {
FunctionDomain::Domain(
domain.clone().map(|(_, val_domain)| val_domain.clone())
)
},
vectorize_1_arg::<MapType<GenericType<0>, GenericType<1>>, ArrayType<GenericType<1>>>(
|map, _| map.values
),
);
}
62 changes: 62 additions & 0 deletions src/query/functions/tests/it/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ fn test_map() {

test_create(file);
test_get(file);
test_map_keys(file);
test_map_values(file);
}

fn test_create(file: &mut impl Write) {
Expand Down Expand Up @@ -82,3 +84,63 @@ fn test_get(file: &mut impl Write) {
("v2", StringType::from_data(vec!["v3", "v4"])),
]);
}

fn test_map_keys(file: &mut impl Write) {
run_ast(file, "map_keys({})", &[]);
run_ast(file, "map_keys({'a':1,'b':2,'c':3})", &[]);
run_ast(file, "map_keys({1:'a',2:'b',3:'c'})", &[]);
run_ast(file, "map_keys({'a':NULL,'b':2,'c':NULL})", &[]);

let columns = [
("a_col", StringType::from_data(vec!["a", "b", "c"])),
("b_col", StringType::from_data(vec!["d", "e", "f"])),
("c_col", StringType::from_data(vec!["x", "y", "z"])),
(
"d_col",
StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]),
),
(
"e_col",
StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]),
),
(
"f_col",
StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]),
),
];
run_ast(
file,
"map_keys(map([a_col, b_col, c_col], [d_col, e_col, f_col]))",
&columns,
);
}

fn test_map_values(file: &mut impl Write) {
run_ast(file, "map_values({})", &[]);
run_ast(file, "map_values({'a':1,'b':2,'c':3})", &[]);
run_ast(file, "map_values({1:'a',2:'b',3:'c'})", &[]);
run_ast(file, "map_values({'a':NULL,'b':2,'c':NULL})", &[]);

let columns = [
("a_col", StringType::from_data(vec!["a", "b", "c"])),
("b_col", StringType::from_data(vec!["d", "e", "f"])),
("c_col", StringType::from_data(vec!["x", "y", "z"])),
(
"d_col",
StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]),
),
(
"e_col",
StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]),
),
(
"f_col",
StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]),
),
];
run_ast(
file,
"map_values(map([a_col, b_col, c_col], [d_col, e_col, f_col]))",
&columns,
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2433,6 +2433,12 @@ Functions overloads:
1 map(Array(Nothing) NULL, Array(Nothing) NULL) :: Map(Nothing) NULL
2 map(Array(T0), Array(T1)) :: Map(T0, T1)
3 map(Array(T0) NULL, Array(T1) NULL) :: Map(T0, T1) NULL
0 map_keys(Map(Nothing)) :: Array(Nothing)
1 map_keys(Map(T0, T1)) :: Array(T0)
2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL
0 map_values(Map(Nothing)) :: Array(Nothing)
1 map_values(Map(T0, T1)) :: Array(T1)
2 map_values(Map(T0, T1) NULL) :: Array(T1) NULL
0 md5(String) :: String
1 md5(String NULL) :: String NULL
0 minus(Variant, Int32) :: Variant
Expand Down
126 changes: 126 additions & 0 deletions src/query/functions/tests/it/scalars/testdata/map.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,129 @@ evaluation (internal):
+--------+------------------------------------------------------------------------------------------------------+


ast : map_keys({})
raw expr : map_keys(map(array(), array()))
checked expr : map_keys<Map(Nothing)>(map<Array(Nothing), Array(Nothing)>(array<>(), array<>()))
optimized expr : [] :: Array(Nothing)
output type : Array(Nothing)
output domain : []
output : []


ast : map_keys({'a':1,'b':2,'c':3})
raw expr : map_keys(map(array('a', 'b', 'c'), array(1, 2, 3)))
checked expr : map_keys<T0=String, T1=UInt8><Map(T0, T1)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)))
optimized expr : ['a', 'b', 'c']
output type : Array(String)
output domain : [{"a"..="c"}]
output : ['a', 'b', 'c']


ast : map_keys({1:'a',2:'b',3:'c'})
raw expr : map_keys(map(array(1, 2, 3), array('a', 'b', 'c')))
checked expr : map_keys<T0=UInt8, T1=String><Map(T0, T1)>(map<T0=UInt8, T1=String><Array(T0), Array(T1)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8), array<T0=String><T0, T0, T0>("a", "b", "c")))
optimized expr : [1, 2, 3]
output type : Array(UInt8)
output domain : [{1..=3}]
output : [1, 2, 3]


ast : map_keys({'a':NULL,'b':2,'c':NULL})
raw expr : map_keys(map(array('a', 'b', 'c'), array(NULL, 2, NULL)))
checked expr : map_keys<T0=String, T1=UInt8 NULL><Map(T0, T1)>(map<T0=String, T1=UInt8 NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8 NULL><T0, T0, T0>(CAST(NULL AS UInt8 NULL), CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL))))
optimized expr : ['a', 'b', 'c']
output type : Array(String)
output domain : [{"a"..="c"}]
output : ['a', 'b', 'c']


ast : map_keys(map([a_col, b_col, c_col], [d_col, e_col, f_col]))
raw expr : map_keys(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)))
checked expr : map_keys<T0=String, T1=String NULL><Map(T0, T1)>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)))
evaluation:
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+
| | a_col | b_col | c_col | d_col | e_col | f_col | Output |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+
| Type | String | String | String | String NULL | String NULL | String NULL | Array(String) |
| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown |
| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | ['a', 'd', 'x'] |
| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | ['b', 'e', 'y'] |
| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | ['c', 'f', 'z'] |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+
evaluation (internal):
+--------+-------------------------------------------------------------------------------------------------------------------------------------+
| Column | Data |
+--------+-------------------------------------------------------------------------------------------------------------------------------------+
| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } |
| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } |
| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } |
| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } |
| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } |
| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } |
| Output | ArrayColumn { values: StringColumn { data: 0x61647862657963667a, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] }, offsets: [0, 3, 6, 9] } |
+--------+-------------------------------------------------------------------------------------------------------------------------------------+


ast : map_values({})
raw expr : map_values(map(array(), array()))
checked expr : map_values<Map(Nothing)>(map<Array(Nothing), Array(Nothing)>(array<>(), array<>()))
optimized expr : [] :: Array(Nothing)
output type : Array(Nothing)
output domain : []
output : []


ast : map_values({'a':1,'b':2,'c':3})
raw expr : map_values(map(array('a', 'b', 'c'), array(1, 2, 3)))
checked expr : map_values<T0=String, T1=UInt8><Map(T0, T1)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)))
optimized expr : [1, 2, 3]
output type : Array(UInt8)
output domain : [{1..=3}]
output : [1, 2, 3]


ast : map_values({1:'a',2:'b',3:'c'})
raw expr : map_values(map(array(1, 2, 3), array('a', 'b', 'c')))
checked expr : map_values<T0=UInt8, T1=String><Map(T0, T1)>(map<T0=UInt8, T1=String><Array(T0), Array(T1)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8), array<T0=String><T0, T0, T0>("a", "b", "c")))
optimized expr : ['a', 'b', 'c']
output type : Array(String)
output domain : [{"a"..="c"}]
output : ['a', 'b', 'c']


ast : map_values({'a':NULL,'b':2,'c':NULL})
raw expr : map_values(map(array('a', 'b', 'c'), array(NULL, 2, NULL)))
checked expr : map_values<T0=String, T1=UInt8 NULL><Map(T0, T1)>(map<T0=String, T1=UInt8 NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8 NULL><T0, T0, T0>(CAST(NULL AS UInt8 NULL), CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL))))
optimized expr : [NULL, 2, NULL]
output type : Array(UInt8 NULL)
output domain : [{0..=2} ∪ {NULL}]
output : [NULL, 2, NULL]


ast : map_values(map([a_col, b_col, c_col], [d_col, e_col, f_col]))
raw expr : map_values(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)))
checked expr : map_values<T0=String, T1=String NULL><Map(T0, T1)>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)))
evaluation:
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+
| | a_col | b_col | c_col | d_col | e_col | f_col | Output |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+
| Type | String | String | String | String NULL | String NULL | String NULL | Array(String NULL) |
| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown |
| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | ['v1', 'v4', 'v6'] |
| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | ['v2', 'v5', NULL] |
| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | ['v3', NULL, 'v7'] |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+
evaluation (internal):
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Column | Data |
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } |
| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } |
| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } |
| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } |
| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } |
| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } |
| Output | ArrayColumn { values: NullableColumn { column: StringColumn { data: 0x7631763476367632763576337637, offsets: [0, 2, 4, 6, 8, 10, 10, 12, 12, 14] }, validity: [0b01011111, 0b_______1] }, offsets: [0, 3, 6, 9] } |
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+


Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
statement ok
DROP DATABASE IF EXISTS map_func_test

statement ok
CREATE DATABASE IF NOT EXISTS map_func_test

statement ok
USE map_func_test

query TT
select map_keys({}), map_values({})
----
[] []

query TT
select map_keys({'k1':1,'k2':2,'k3':null}), map_values({'k1':1,'k2':2,'k3':null})
----
['k1','k2','k3'] [1,2,NULL]

statement ok
create table t(col1 Map(String, String Null) Not Null, col2 Map(String, Int Null) Null)

statement ok
insert into t values({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null)

query TT
select map_keys(col1), map_keys(col2) from t
----
['k1','k2','k3'] ['a','b']
['k5','k6'] ['d','e','f']
[] NULL

query TT
select map_values(col1), map_values(col2) from t
----
['v1','v2',NULL] [10,20]
['v5','v6'] [40,NULL,50]
[] NULL

statement ok
DROP DATABASE map_func_test

0 comments on commit 316b635

Please sign in to comment.