From 316b635c9882a40e896a24681fa189c0432cc410 Mon Sep 17 00:00:00 2001 From: baishen Date: Mon, 22 Apr 2024 22:47:12 +0800 Subject: [PATCH] feat(query): support `map_keys` and `map_values` function (#15291) --- src/query/functions/src/scalars/map.rs | 37 +++++ src/query/functions/tests/it/scalars/map.rs | 62 +++++++++ .../it/scalars/testdata/function_list.txt | 6 + .../tests/it/scalars/testdata/map.txt | 126 ++++++++++++++++++ .../{02_0072_prql.test => 02_0073_prql.test} | 0 .../query/functions/02_0074_function_map.test | 41 ++++++ 6 files changed, 272 insertions(+) rename tests/sqllogictests/suites/query/functions/{02_0072_prql.test => 02_0073_prql.test} (100%) create mode 100644 tests/sqllogictests/suites/query/functions/02_0074_function_map.test diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 69766737dd02..235a791abc6c 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -22,6 +22,7 @@ use databend_common_expression::types::GenericType; use databend_common_expression::types::MapType; use databend_common_expression::types::NullType; use databend_common_expression::types::NullableType; +use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; @@ -119,4 +120,40 @@ pub fn register(registry: &mut FunctionRegistry) { } ), ); + + registry.register_1_arg_core::( + "map_keys", + |_, _| FunctionDomain::Full, + |_, _| Value::Scalar(()), + ); + + registry.register_passthrough_nullable_1_arg::, GenericType<1>>, ArrayType>, _, _>( + "map_keys", + |_, domain| { + FunctionDomain::Domain( + domain.clone().map(|(key_domain, _)| key_domain.clone()) + ) + }, + vectorize_1_arg::, GenericType<1>>, ArrayType>>( + |map, _| map.keys + ), + ); + + registry.register_1_arg_core::( + "map_values", + |_, _| FunctionDomain::Full, + |_, _| Value::Scalar(()), + ); + + registry.register_passthrough_nullable_1_arg::, GenericType<1>>, ArrayType>, _, _>( + "map_values", + |_, domain| { + FunctionDomain::Domain( + domain.clone().map(|(_, val_domain)| val_domain.clone()) + ) + }, + vectorize_1_arg::, GenericType<1>>, ArrayType>>( + |map, _| map.values + ), + ); } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index f65091000a2d..24a6b1088aee 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -27,6 +27,8 @@ fn test_map() { test_create(file); test_get(file); + test_map_keys(file); + test_map_values(file); } fn test_create(file: &mut impl Write) { @@ -82,3 +84,63 @@ fn test_get(file: &mut impl Write) { ("v2", StringType::from_data(vec!["v3", "v4"])), ]); } + +fn test_map_keys(file: &mut impl Write) { + run_ast(file, "map_keys({})", &[]); + run_ast(file, "map_keys({'a':1,'b':2,'c':3})", &[]); + run_ast(file, "map_keys({1:'a',2:'b',3:'c'})", &[]); + run_ast(file, "map_keys({'a':NULL,'b':2,'c':NULL})", &[]); + + let columns = [ + ("a_col", StringType::from_data(vec!["a", "b", "c"])), + ("b_col", StringType::from_data(vec!["d", "e", "f"])), + ("c_col", StringType::from_data(vec!["x", "y", "z"])), + ( + "d_col", + StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]), + ), + ( + "e_col", + StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]), + ), + ( + "f_col", + StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]), + ), + ]; + run_ast( + file, + "map_keys(map([a_col, b_col, c_col], [d_col, e_col, f_col]))", + &columns, + ); +} + +fn test_map_values(file: &mut impl Write) { + run_ast(file, "map_values({})", &[]); + run_ast(file, "map_values({'a':1,'b':2,'c':3})", &[]); + run_ast(file, "map_values({1:'a',2:'b',3:'c'})", &[]); + run_ast(file, "map_values({'a':NULL,'b':2,'c':NULL})", &[]); + + let columns = [ + ("a_col", StringType::from_data(vec!["a", "b", "c"])), + ("b_col", StringType::from_data(vec!["d", "e", "f"])), + ("c_col", StringType::from_data(vec!["x", "y", "z"])), + ( + "d_col", + StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]), + ), + ( + "e_col", + StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]), + ), + ( + "f_col", + StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]), + ), + ]; + run_ast( + file, + "map_values(map([a_col, b_col, c_col], [d_col, e_col, f_col]))", + &columns, + ); +} diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 95a16530692d..213fa67cb876 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2433,6 +2433,12 @@ Functions overloads: 1 map(Array(Nothing) NULL, Array(Nothing) NULL) :: Map(Nothing) NULL 2 map(Array(T0), Array(T1)) :: Map(T0, T1) 3 map(Array(T0) NULL, Array(T1) NULL) :: Map(T0, T1) NULL +0 map_keys(Map(Nothing)) :: Array(Nothing) +1 map_keys(Map(T0, T1)) :: Array(T0) +2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL +0 map_values(Map(Nothing)) :: Array(Nothing) +1 map_values(Map(T0, T1)) :: Array(T1) +2 map_values(Map(T0, T1) NULL) :: Array(T1) NULL 0 md5(String) :: String 1 md5(String NULL) :: String NULL 0 minus(Variant, Int32) :: Variant diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 00e4f98532b4..dc53ff94d3c1 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -212,3 +212,129 @@ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------+ +ast : map_keys({}) +raw expr : map_keys(map(array(), array())) +checked expr : map_keys(map(array<>(), array<>())) +optimized expr : [] :: Array(Nothing) +output type : Array(Nothing) +output domain : [] +output : [] + + +ast : map_keys({'a':1,'b':2,'c':3}) +raw expr : map_keys(map(array('a', 'b', 'c'), array(1, 2, 3))) +checked expr : map_keys(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8))) +optimized expr : ['a', 'b', 'c'] +output type : Array(String) +output domain : [{"a"..="c"}] +output : ['a', 'b', 'c'] + + +ast : map_keys({1:'a',2:'b',3:'c'}) +raw expr : map_keys(map(array(1, 2, 3), array('a', 'b', 'c'))) +checked expr : map_keys(map(array(1_u8, 2_u8, 3_u8), array("a", "b", "c"))) +optimized expr : [1, 2, 3] +output type : Array(UInt8) +output domain : [{1..=3}] +output : [1, 2, 3] + + +ast : map_keys({'a':NULL,'b':2,'c':NULL}) +raw expr : map_keys(map(array('a', 'b', 'c'), array(NULL, 2, NULL))) +checked expr : map_keys(map(array("a", "b", "c"), array(CAST(NULL AS UInt8 NULL), CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)))) +optimized expr : ['a', 'b', 'c'] +output type : Array(String) +output domain : [{"a"..="c"}] +output : ['a', 'b', 'c'] + + +ast : map_keys(map([a_col, b_col, c_col], [d_col, e_col, f_col])) +raw expr : map_keys(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL))) +checked expr : map_keys(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col))) +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Array(String) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | ['a', 'd', 'x'] | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | ['b', 'e', 'y'] | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | ['c', 'f', 'z'] | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+ +evaluation (internal): ++--------+-------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: StringColumn { data: 0x61647862657963667a, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] }, offsets: [0, 3, 6, 9] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : map_values({}) +raw expr : map_values(map(array(), array())) +checked expr : map_values(map(array<>(), array<>())) +optimized expr : [] :: Array(Nothing) +output type : Array(Nothing) +output domain : [] +output : [] + + +ast : map_values({'a':1,'b':2,'c':3}) +raw expr : map_values(map(array('a', 'b', 'c'), array(1, 2, 3))) +checked expr : map_values(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8))) +optimized expr : [1, 2, 3] +output type : Array(UInt8) +output domain : [{1..=3}] +output : [1, 2, 3] + + +ast : map_values({1:'a',2:'b',3:'c'}) +raw expr : map_values(map(array(1, 2, 3), array('a', 'b', 'c'))) +checked expr : map_values(map(array(1_u8, 2_u8, 3_u8), array("a", "b", "c"))) +optimized expr : ['a', 'b', 'c'] +output type : Array(String) +output domain : [{"a"..="c"}] +output : ['a', 'b', 'c'] + + +ast : map_values({'a':NULL,'b':2,'c':NULL}) +raw expr : map_values(map(array('a', 'b', 'c'), array(NULL, 2, NULL))) +checked expr : map_values(map(array("a", "b", "c"), array(CAST(NULL AS UInt8 NULL), CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)))) +optimized expr : [NULL, 2, NULL] +output type : Array(UInt8 NULL) +output domain : [{0..=2} ∪ {NULL}] +output : [NULL, 2, NULL] + + +ast : map_values(map([a_col, b_col, c_col], [d_col, e_col, f_col])) +raw expr : map_values(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL))) +checked expr : map_values(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col))) +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Array(String NULL) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | ['v1', 'v4', 'v6'] | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | ['v2', 'v5', NULL] | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | ['v3', NULL, 'v7'] | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+ +evaluation (internal): ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: NullableColumn { column: StringColumn { data: 0x7631763476367632763576337637, offsets: [0, 2, 4, 6, 8, 10, 10, 12, 12, 14] }, validity: [0b01011111, 0b_______1] }, offsets: [0, 3, 6, 9] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/tests/sqllogictests/suites/query/functions/02_0072_prql.test b/tests/sqllogictests/suites/query/functions/02_0073_prql.test similarity index 100% rename from tests/sqllogictests/suites/query/functions/02_0072_prql.test rename to tests/sqllogictests/suites/query/functions/02_0073_prql.test diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test new file mode 100644 index 000000000000..0f8be27eadf7 --- /dev/null +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -0,0 +1,41 @@ +statement ok +DROP DATABASE IF EXISTS map_func_test + +statement ok +CREATE DATABASE IF NOT EXISTS map_func_test + +statement ok +USE map_func_test + +query TT +select map_keys({}), map_values({}) +---- +[] [] + +query TT +select map_keys({'k1':1,'k2':2,'k3':null}), map_values({'k1':1,'k2':2,'k3':null}) +---- +['k1','k2','k3'] [1,2,NULL] + +statement ok +create table t(col1 Map(String, String Null) Not Null, col2 Map(String, Int Null) Null) + +statement ok +insert into t values({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null) + +query TT +select map_keys(col1), map_keys(col2) from t +---- +['k1','k2','k3'] ['a','b'] +['k5','k6'] ['d','e','f'] +[] NULL + +query TT +select map_values(col1), map_values(col2) from t +---- +['v1','v2',NULL] [10,20] +['v5','v6'] [40,NULL,50] +[] NULL + +statement ok +DROP DATABASE map_func_test