Skip to content

Commit

Permalink
feat(function): Implement map_cat function (#15348)
Browse files Browse the repository at this point in the history
* Implement map_cat function

Signed-off-by: shamb0 <[email protected]>

* Implement map_cat function

Signed-off-by: shamb0 <[email protected]>

* Implement map_cat function

Signed-off-by: shamb0 <[email protected]>

* feat: implement map functions using MapType

* merge to upstream main updates

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Make CI HappyOF

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

* Refactor map.rs function module based on code review feedback

Signed-off-by: shamb0 <[email protected]>

---------

Signed-off-by: shamb0 <[email protected]>
  • Loading branch information
shamb0 authored May 9, 2024
1 parent 43863f4 commit f68c97e
Show file tree
Hide file tree
Showing 5 changed files with 348 additions and 0 deletions.
57 changes: 57 additions & 0 deletions src/query/functions/src/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashSet;
use std::hash::Hash;

use databend_common_expression::types::nullable::NullableDomain;
use databend_common_expression::types::ArgType;
use databend_common_expression::types::ArrayType;
use databend_common_expression::types::EmptyArrayType;
use databend_common_expression::types::EmptyMapType;
Expand Down Expand Up @@ -159,6 +161,61 @@ pub fn register(registry: &mut FunctionRegistry) {
),
);

registry.register_2_arg::<EmptyMapType, EmptyMapType, EmptyMapType, _, _>(
"map_cat",
|_, _, _| FunctionDomain::Full,
|_, _, _| (),
);

registry.register_passthrough_nullable_2_arg(
"map_cat",
|_, domain1, domain2| {
FunctionDomain::Domain(match (domain1, domain2) {
(Some((key_domain1, val_domain1)), Some((key_domain2, val_domain2))) => Some((
key_domain1.merge(key_domain2),
val_domain1.merge(val_domain2),
)),
(Some(domain1), None) => Some(domain1).cloned(),
(None, Some(domain2)) => Some(domain2).cloned(),
(None, None) => None,
})
},
vectorize_with_builder_2_arg::<
MapType<GenericType<0>, GenericType<1>>,
MapType<GenericType<0>, GenericType<1>>,
MapType<GenericType<0>, GenericType<1>>,
>(|lhs, rhs, output_map, ctx| {
if let Some(validity) = &ctx.validity {
if !validity.get_bit(output_map.len()) {
output_map.push_default();
return;
}
}

let mut concatenated_map_builder =
ArrayType::create_builder(lhs.len() + rhs.len(), ctx.generics);
let mut detect_dup_keys = HashSet::new();

for (lhs_key, lhs_value) in lhs.iter() {
if let Some((_, rhs_value)) = rhs.iter().find(|(rhs_key, _)| lhs_key == *rhs_key) {
detect_dup_keys.insert(lhs_key.clone());
concatenated_map_builder.put_item((lhs_key.clone(), rhs_value.clone()));
} else {
concatenated_map_builder.put_item((lhs_key.clone(), lhs_value.clone()));
}
}

for (rhs_key, rhs_value) in rhs.iter() {
if !detect_dup_keys.contains(&rhs_key) {
concatenated_map_builder.put_item((rhs_key, rhs_value));
}
}

concatenated_map_builder.commit_row();
output_map.append_column(&concatenated_map_builder.build());
}),
);

registry.register_1_arg_core::<EmptyMapType, NumberType<u8>, _, _>(
"map_size",
|_, _| FunctionDomain::Domain(SimpleDomain { min: 0, max: 0 }),
Expand Down
66 changes: 66 additions & 0 deletions src/query/functions/tests/it/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,72 @@ fn test_map() {
test_map_keys(file);
test_map_values(file);
test_map_size(file);
test_map_cat(file);
}

fn test_map_cat(file: &mut impl Write) {
// Empty Inputs:: tests behavior with empty input maps
run_ast(file, "map_cat({}, {})", &[]);
run_ast(file, "map_cat({}, {'k1': 'v1'})", &[]);
run_ast(file, "map_cat({'k1': 'v1'}, {})", &[]);

// Basic Functionality:: evaluates core functionality
let columns = [
("a_col", StringType::from_data(vec!["a_k1", "a_k2", "a_k3"])),
("b_col", StringType::from_data(vec!["b_k1", "b_k2", "b_k3"])),
("c_col", StringType::from_data(vec!["c_k1", "c_k2", "c_k3"])),
("d_col", StringType::from_data(vec!["aaa1", "aaa2", "aaa3"])),
("e_col", StringType::from_data(vec!["bbb1", "bbb2", "bbb3"])),
("f_col", StringType::from_data(vec!["ccc1", "ccc2", "ccc3"])),
];

run_ast(
file,
"map_cat(map([a_col, b_col], [d_col, e_col]), map([c_col], [f_col]))",
&columns,
);

run_ast(file, "map_cat({'k1':'v1','k2':'v2'}, {'k1':'abc'})", &[]);

// Duplicate Keys:: assesses handling of duplicate keys
let columns = [
("a_col", StringType::from_data(vec!["a_k1", "a_k2", "c_k3"])),
("b_col", StringType::from_data(vec!["b_k1", "c_k2", "b_k3"])),
("c_col", StringType::from_data(vec!["c_k1", "c_k2", "c_k3"])),
("d_col", StringType::from_data(vec!["aaa1", "aaa2", "aaa3"])),
("e_col", StringType::from_data(vec!["bbb1", "bbb2", "bbb3"])),
("f_col", StringType::from_data(vec!["ccc1", "ccc2", "ccc3"])),
];

run_ast(
file,
"map_cat(map([a_col, b_col], [d_col, e_col]), map([c_col], [f_col]))",
&columns,
);

// Map Size Variation:: tests behavior with different map sizes
run_ast(file, "map_cat({'k1': 'v1', 'k2': 'v2'}, {'k3': 'v3'})", &[]);
run_ast(file, "map_cat({'k1': 'v1'}, {'k2': 'v2', 'k3': 'v3'})", &[]);

// Null Values:: validates behavior for null values
run_ast(
file,
"map_cat({'k1': 'v1', 'k2': NULL}, {'k2': 'v2', 'k3': NULL})",
&[],
);

// Nested Maps:: examines recursive merging capabilities
run_ast(
file,
"map_cat({'k1': {'nk1': 'nv1'}, 'k2': {'nk2': 'nv2'}}, {'k2': {'nk3': 'nv3'}, 'k3': {'nk4': 'nv4'}})",
&[],
);

run_ast(
file,
"map_cat({'k1': {'nk1': 'nv1'}, 'k2': {'nk2': 'nv2'}}, {'k1': {'nk1': 'new_nv1'}, 'k2': {'nk3': 'nv3'}})",
&[],
);
}

fn test_create(file: &mut impl Write) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2436,6 +2436,10 @@ Functions overloads:
1 map(Array(Nothing) NULL, Array(Nothing) NULL) :: Map(Nothing) NULL
2 map(Array(T0), Array(T1)) :: Map(T0, T1)
3 map(Array(T0) NULL, Array(T1) NULL) :: Map(T0, T1) NULL
0 map_cat(Map(Nothing), Map(Nothing)) :: Map(Nothing)
1 map_cat(Map(Nothing) NULL, Map(Nothing) NULL) :: Map(Nothing) NULL
2 map_cat(Map(T0, T1), Map(T0, T1)) :: Map(T0, T1)
3 map_cat(Map(T0, T1) NULL, Map(T0, T1) NULL) :: Map(T0, T1) NULL
0 map_keys(Map(Nothing)) :: Array(Nothing)
1 map_keys(Map(T0, T1)) :: Array(T0)
2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL
Expand Down
Loading

0 comments on commit f68c97e

Please sign in to comment.