Skip to content

Commit

Permalink
feat(query): to_binary function support variant, bitmap, geometry, …
Browse files Browse the repository at this point in the history
…geography types (#17026)

* feat(query): `to_binary` function support variant, bitmap, geometry, geography types

* add tests

* remove some dict tests
  • Loading branch information
b41sh authored Dec 13, 2024
1 parent 3a9f404 commit 85f4771
Show file tree
Hide file tree
Showing 9 changed files with 353 additions and 30 deletions.
8 changes: 0 additions & 8 deletions src/common/column/src/binary/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,3 @@ impl Debug for BinaryColumn {
.finish()
}
}

// impl Debug for BinaryColumn {
// fn fmt(&self, f: &mut Formatter) -> Result {
// let writer = |f: &mut Formatter, index| write_value(self, index, f);
// write!(f, "BinaryColumn")?;
// write_vec(f, writer, None, self.len(), "None", false)
// }
// }
88 changes: 88 additions & 0 deletions src/query/functions/src/scalars/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,15 @@ use databend_common_expression::types::string::StringColumnBuilder;
use databend_common_expression::types::AnyType;
use databend_common_expression::types::BinaryType;
use databend_common_expression::types::Bitmap;
use databend_common_expression::types::BitmapType;
use databend_common_expression::types::DataType;
use databend_common_expression::types::GeographyType;
use databend_common_expression::types::GeometryType;
use databend_common_expression::types::NumberDataType;
use databend_common_expression::types::NumberType;
use databend_common_expression::types::StringType;
use databend_common_expression::types::UInt8Type;
use databend_common_expression::types::VariantType;
use databend_common_expression::vectorize_1_arg;
use databend_common_expression::Column;
use databend_common_expression::EvalContext;
Expand Down Expand Up @@ -63,6 +67,90 @@ pub fn register(registry: &mut FunctionRegistry) {
error_to_null(eval_binary_to_string),
);

registry.register_passthrough_nullable_1_arg::<VariantType, BinaryType, _, _>(
"to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(val.to_vec()),
Value::Column(col) => Value::Column(col),
},
);

registry.register_combine_nullable_1_arg::<VariantType, BinaryType, _, _>(
"try_to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(Some(val.to_vec())),
Value::Column(col) => {
let validity = Bitmap::new_constant(true, col.len());
Value::Column(NullableColumn::new(col, validity))
}
},
);

registry.register_passthrough_nullable_1_arg::<BitmapType, BinaryType, _, _>(
"to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(val.to_vec()),
Value::Column(col) => Value::Column(col),
},
);

registry.register_combine_nullable_1_arg::<BitmapType, BinaryType, _, _>(
"try_to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(Some(val.to_vec())),
Value::Column(col) => {
let validity = Bitmap::new_constant(true, col.len());
Value::Column(NullableColumn::new(col, validity))
}
},
);

registry.register_passthrough_nullable_1_arg::<GeometryType, BinaryType, _, _>(
"to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(val.to_vec()),
Value::Column(col) => Value::Column(col),
},
);

registry.register_combine_nullable_1_arg::<GeometryType, BinaryType, _, _>(
"try_to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(Some(val.to_vec())),
Value::Column(col) => {
let validity = Bitmap::new_constant(true, col.len());
Value::Column(NullableColumn::new(col, validity))
}
},
);

registry.register_passthrough_nullable_1_arg::<GeographyType, BinaryType, _, _>(
"to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(val.0.to_vec()),
Value::Column(col) => Value::Column(col.0),
},
);

registry.register_combine_nullable_1_arg::<GeographyType, BinaryType, _, _>(
"try_to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
Value::Scalar(val) => Value::Scalar(Some(val.0.to_vec())),
Value::Column(col) => {
let validity = Bitmap::new_constant(true, col.len());
Value::Column(NullableColumn::new(col.0, validity))
}
},
);

registry.register_passthrough_nullable_1_arg::<StringType, BinaryType, _, _>(
"to_binary",
|_, _| FunctionDomain::Full,
Expand Down
40 changes: 40 additions & 0 deletions src/query/functions/tests/it/scalars/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ fn test_binary() {
for is_try in [false, true] {
test_from_base64(file, is_try);
test_from_hex(file, is_try);
test_to_binary(file, is_try);
}
}

Expand Down Expand Up @@ -91,3 +92,42 @@ fn test_from_base64(file: &mut impl Write, is_try: bool) {
)]);
run_ast(file, format!("{prefix}from_base64('!@#')"), &[]);
}

fn test_to_binary(file: &mut impl Write, is_try: bool) {
let prefix = if is_try { "TRY_" } else { "" };

run_ast(
file,
format!("{prefix}to_binary(parse_json('{{\"k1\":\"val\",\"k2\":100}}'))"),
&[],
);
run_ast(file, format!("{prefix}to_binary(parse_json('10'))"), &[]);
run_ast(file, format!("{prefix}to_binary(parse_json('123456'))"), &[
]);
run_ast(
file,
format!("{prefix}to_binary(parse_json('\"abcd\"'))"),
&[],
);
run_ast(file, format!("{prefix}to_binary(to_bitmap('1,2,3'))"), &[]);
run_ast(
file,
format!("{prefix}to_binary(to_bitmap('100,25,50,700'))"),
&[],
);
run_ast(
file,
format!("{prefix}to_binary(st_geometryfromwkt('SRID=4326;POINT(1.0 2.0)'))"),
&[],
);
run_ast(
file,
format!("{prefix}to_binary(st_geometryfromwkb(unhex('0101000020797f000066666666a9cb17411f85ebc19e325641')))"),
&[],
);
run_ast(
file,
format!("{prefix}to_binary(st_geographyfromewkt('SRID=4326;POINT(-122.35 37.55)'))"),
&[],
);
}
164 changes: 163 additions & 1 deletion src/query/functions/tests/it/scalars/testdata/binary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ output : 5

ast : length(to_binary(NULL))
raw expr : length(to_binary(NULL))
checked expr : length<Binary NULL>(to_binary<String NULL>(CAST(NULL AS String NULL)))
checked expr : length<Binary NULL>(to_binary<Variant NULL>(CAST(NULL AS Variant NULL)))
optimized expr : NULL
output type : UInt64 NULL
output domain : {NULL}
Expand Down Expand Up @@ -203,6 +203,87 @@ evaluation (internal):
+--------+------------------------------------------------+


ast : to_binary(parse_json('{"k1":"val","k2":100}'))
raw expr : to_binary(parse_json('{"k1":"val","k2":100}'))
checked expr : to_binary<Variant>(parse_json<String>("{\"k1\":\"val\",\"k2\":100}"))
optimized expr : 40000002100000021000000210000003200000026B316B3276616C5064
output type : Binary
output domain : Undefined
output : 40000002100000021000000210000003200000026B316B3276616C5064


ast : to_binary(parse_json('10'))
raw expr : to_binary(parse_json('10'))
checked expr : to_binary<Variant>(parse_json<String>("10"))
optimized expr : 2000000020000002500A
output type : Binary
output domain : Undefined
output : 2000000020000002500A


ast : to_binary(parse_json('123456'))
raw expr : to_binary(parse_json('123456'))
checked expr : to_binary<Variant>(parse_json<String>("123456"))
optimized expr : 2000000020000005500001E240
output type : Binary
output domain : Undefined
output : 2000000020000005500001E240


ast : to_binary(parse_json('"abcd"'))
raw expr : to_binary(parse_json('"abcd"'))
checked expr : to_binary<Variant>(parse_json<String>("\"abcd\""))
optimized expr : 200000001000000461626364
output type : Binary
output domain : Undefined
output : 200000001000000461626364


ast : to_binary(to_bitmap('1,2,3'))
raw expr : to_binary(to_bitmap('1,2,3'))
checked expr : to_binary<Bitmap>(to_bitmap<String>("1,2,3"))
optimized expr : 0100000000000000000000003A300000010000000000020010000000010002000300
output type : Binary
output domain : Undefined
output : 0100000000000000000000003A300000010000000000020010000000010002000300


ast : to_binary(to_bitmap('100,25,50,700'))
raw expr : to_binary(to_bitmap('100,25,50,700'))
checked expr : to_binary<Bitmap>(to_bitmap<String>("100,25,50,700"))
optimized expr : 0100000000000000000000003A300000010000000000030010000000190032006400BC02
output type : Binary
output domain : Undefined
output : 0100000000000000000000003A300000010000000000030010000000190032006400BC02


ast : to_binary(st_geometryfromwkt('SRID=4326;POINT(1.0 2.0)'))
raw expr : to_binary(st_geometryfromwkt('SRID=4326;POINT(1.0 2.0)'))
checked expr : to_binary<Geometry>(st_geometryfromwkt<String>("SRID=4326;POINT(1.0 2.0)"))
optimized expr : 0101000020E6100000000000000000F03F0000000000000040
output type : Binary
output domain : Undefined
output : 0101000020E6100000000000000000F03F0000000000000040


ast : to_binary(st_geometryfromwkb(unhex('0101000020797f000066666666a9cb17411f85ebc19e325641')))
raw expr : to_binary(st_geometryfromwkb(unhex('0101000020797f000066666666a9cb17411f85ebc19e325641')))
checked expr : to_binary<Geometry>(st_geometryfromwkb<Binary>(from_hex<String>("0101000020797f000066666666a9cb17411f85ebc19e325641")))
optimized expr : 0101000020797F000066666666A9CB17411F85EBC19E325641
output type : Binary
output domain : Undefined
output : 0101000020797F000066666666A9CB17411F85EBC19E325641


ast : to_binary(st_geographyfromewkt('SRID=4326;POINT(-122.35 37.55)'))
raw expr : to_binary(st_geographyfromewkt('SRID=4326;POINT(-122.35 37.55)'))
checked expr : to_binary<Geography>(st_geographyfromewkt<String>("SRID=4326;POINT(-122.35 37.55)"))
optimized expr : 0101000020E61000006666666666965EC06666666666C64240
output type : Binary
output domain : Undefined
output : 0101000020E61000006666666666965EC06666666666C64240


ast : TRY_from_base64('QWJj')::String
raw expr : CAST(TRY_from_base64('QWJj') AS String)
checked expr : CAST(try_from_base64<String>("QWJj") AS String)
Expand Down Expand Up @@ -291,3 +372,84 @@ evaluation (internal):
+--------+------------------------------------------------+


ast : TRY_to_binary(parse_json('{"k1":"val","k2":100}'))
raw expr : TRY_to_binary(parse_json('{"k1":"val","k2":100}'))
checked expr : try_to_binary<Variant>(parse_json<String>("{\"k1\":\"val\",\"k2\":100}"))
optimized expr : 40000002100000021000000210000003200000026B316B3276616C5064
output type : Binary NULL
output domain : Undefined
output : 40000002100000021000000210000003200000026B316B3276616C5064


ast : TRY_to_binary(parse_json('10'))
raw expr : TRY_to_binary(parse_json('10'))
checked expr : try_to_binary<Variant>(parse_json<String>("10"))
optimized expr : 2000000020000002500A
output type : Binary NULL
output domain : Undefined
output : 2000000020000002500A


ast : TRY_to_binary(parse_json('123456'))
raw expr : TRY_to_binary(parse_json('123456'))
checked expr : try_to_binary<Variant>(parse_json<String>("123456"))
optimized expr : 2000000020000005500001E240
output type : Binary NULL
output domain : Undefined
output : 2000000020000005500001E240


ast : TRY_to_binary(parse_json('"abcd"'))
raw expr : TRY_to_binary(parse_json('"abcd"'))
checked expr : try_to_binary<Variant>(parse_json<String>("\"abcd\""))
optimized expr : 200000001000000461626364
output type : Binary NULL
output domain : Undefined
output : 200000001000000461626364


ast : TRY_to_binary(to_bitmap('1,2,3'))
raw expr : TRY_to_binary(to_bitmap('1,2,3'))
checked expr : try_to_binary<Bitmap>(to_bitmap<String>("1,2,3"))
optimized expr : 0100000000000000000000003A300000010000000000020010000000010002000300
output type : Binary NULL
output domain : Undefined
output : 0100000000000000000000003A300000010000000000020010000000010002000300


ast : TRY_to_binary(to_bitmap('100,25,50,700'))
raw expr : TRY_to_binary(to_bitmap('100,25,50,700'))
checked expr : try_to_binary<Bitmap>(to_bitmap<String>("100,25,50,700"))
optimized expr : 0100000000000000000000003A300000010000000000030010000000190032006400BC02
output type : Binary NULL
output domain : Undefined
output : 0100000000000000000000003A300000010000000000030010000000190032006400BC02


ast : TRY_to_binary(st_geometryfromwkt('SRID=4326;POINT(1.0 2.0)'))
raw expr : TRY_to_binary(st_geometryfromwkt('SRID=4326;POINT(1.0 2.0)'))
checked expr : try_to_binary<Geometry>(st_geometryfromwkt<String>("SRID=4326;POINT(1.0 2.0)"))
optimized expr : 0101000020E6100000000000000000F03F0000000000000040
output type : Binary NULL
output domain : Undefined
output : 0101000020E6100000000000000000F03F0000000000000040


ast : TRY_to_binary(st_geometryfromwkb(unhex('0101000020797f000066666666a9cb17411f85ebc19e325641')))
raw expr : TRY_to_binary(st_geometryfromwkb(unhex('0101000020797f000066666666a9cb17411f85ebc19e325641')))
checked expr : try_to_binary<Geometry>(st_geometryfromwkb<Binary>(from_hex<String>("0101000020797f000066666666a9cb17411f85ebc19e325641")))
optimized expr : 0101000020797F000066666666A9CB17411F85EBC19E325641
output type : Binary NULL
output domain : Undefined
output : 0101000020797F000066666666A9CB17411F85EBC19E325641


ast : TRY_to_binary(st_geographyfromewkt('SRID=4326;POINT(-122.35 37.55)'))
raw expr : TRY_to_binary(st_geographyfromewkt('SRID=4326;POINT(-122.35 37.55)'))
checked expr : try_to_binary<Geography>(st_geographyfromewkt<String>("SRID=4326;POINT(-122.35 37.55)"))
optimized expr : 0101000020E61000006666666666965EC06666666666C64240
output type : Binary NULL
output domain : Undefined
output : 0101000020E61000006666666666965EC06666666666C64240


4 changes: 2 additions & 2 deletions src/query/functions/tests/it/scalars/testdata/cast.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4118,7 +4118,7 @@ output : C39FF09F9880E5B1B1

ast : TRY_CAST(NULL AS BINARY)
raw expr : TRY_CAST(NULL AS Binary)
checked expr : try_to_binary<String NULL>(CAST(NULL AS String NULL))
checked expr : try_to_binary<Variant NULL>(CAST(NULL AS Variant NULL))
optimized expr : NULL
output type : Binary NULL
output domain : {NULL}
Expand Down Expand Up @@ -4229,7 +4229,7 @@ output : 'ß😀山'

ast : TRY_CAST(TRY_CAST(NULL AS BINARY) AS STRING)
raw expr : TRY_CAST(TRY_CAST(NULL AS Binary) AS String)
checked expr : try_to_string<Binary NULL>(try_to_binary<String NULL>(CAST(NULL AS String NULL)))
checked expr : try_to_string<Binary NULL>(try_to_binary<Variant NULL>(CAST(NULL AS Variant NULL)))
optimized expr : NULL
output type : String NULL
output domain : {NULL}
Expand Down
Loading

0 comments on commit 85f4771

Please sign in to comment.