Skip to content

Commit

Permalink
feat: cast between string and binary (#14247)
Browse files Browse the repository at this point in the history
* feat: cast between string and binary

* fmt

* fix

* fix

* improve
  • Loading branch information
andylokandy authored Jan 7, 2024
1 parent 073c685 commit 989fb49
Show file tree
Hide file tree
Showing 16 changed files with 749 additions and 7 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/query/expression/src/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,7 @@ pub fn get_simple_cast_function(is_try: bool, dest_type: &DataType) -> Option<St
}

pub const ALL_SIMPLE_CAST_FUNCTIONS: &[&str] = &[
"to_binary",
"to_string",
"to_uint8",
"to_uint16",
Expand Down
3 changes: 3 additions & 0 deletions src/query/expression/src/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ impl PartialOrd for Scalar {
(Scalar::Number(n1), Scalar::Number(n2)) => n1.partial_cmp(n2),
(Scalar::Decimal(d1), Scalar::Decimal(d2)) => d1.partial_cmp(d2),
(Scalar::Boolean(b1), Scalar::Boolean(b2)) => b1.partial_cmp(b2),
(Scalar::Binary(s1), Scalar::Binary(s2)) => s1.partial_cmp(s2),
(Scalar::String(s1), Scalar::String(s2)) => s1.partial_cmp(s2),
(Scalar::Timestamp(t1), Scalar::Timestamp(t2)) => t1.partial_cmp(t2),
(Scalar::Date(d1), Scalar::Date(d2)) => d1.partial_cmp(d2),
Expand Down Expand Up @@ -628,6 +629,7 @@ impl PartialOrd for ScalarRef<'_> {
(ScalarRef::Number(n1), ScalarRef::Number(n2)) => n1.partial_cmp(n2),
(ScalarRef::Decimal(d1), ScalarRef::Decimal(d2)) => d1.partial_cmp(d2),
(ScalarRef::Boolean(b1), ScalarRef::Boolean(b2)) => b1.partial_cmp(b2),
(ScalarRef::Binary(s1), ScalarRef::Binary(s2)) => s1.partial_cmp(s2),
(ScalarRef::String(s1), ScalarRef::String(s2)) => s1.partial_cmp(s2),
(ScalarRef::Timestamp(t1), ScalarRef::Timestamp(t2)) => t1.partial_cmp(t2),
(ScalarRef::Date(d1), ScalarRef::Date(d2)) => d1.partial_cmp(d2),
Expand Down Expand Up @@ -708,6 +710,7 @@ impl PartialOrd for Column {
(Column::Number(col1), Column::Number(col2)) => col1.partial_cmp(col2),
(Column::Decimal(col1), Column::Decimal(col2)) => col1.partial_cmp(col2),
(Column::Boolean(col1), Column::Boolean(col2)) => col1.iter().partial_cmp(col2.iter()),
(Column::Binary(col1), Column::Binary(col2)) => col1.iter().partial_cmp(col2.iter()),
(Column::String(col1), Column::String(col2)) => col1.iter().partial_cmp(col2.iter()),
(Column::Timestamp(col1), Column::Timestamp(col2)) => {
col1.iter().partial_cmp(col2.iter())
Expand Down
1 change: 1 addition & 0 deletions src/query/formats/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ aho-corasick = { version = "1.0.1" }
async-trait = { workspace = true }
bstr = "1.0.1"
chrono-tz = { workspace = true }
hex = "0.4.3"
lexical-core = "0.8.5"
match-template = { workspace = true }
micromarshal = "0.4.0"
Expand Down
5 changes: 5 additions & 0 deletions src/query/formats/src/field_encoder/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ impl FieldEncoderCSV {
pub(crate) fn write_field(&self, column: &Column, row_index: usize, out_buf: &mut Vec<u8>) {
match &column {
Column::Nullable(box c) => self.write_nullable(c, row_index, out_buf),

Column::Binary(c) => {
let buf = unsafe { c.index_unchecked(row_index) };
self.string_formatter.write_string(buf, out_buf);
}
Column::String(c) => {
let buf = unsafe { c.index_unchecked(row_index) };
self.string_formatter.write_string(buf, out_buf);
Expand Down
2 changes: 1 addition & 1 deletion src/query/formats/src/output_format/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ fn scalar_to_json(s: ScalarRef<'_>, format: &FormatSettings) -> JsonValue {
}
ScalarRef::EmptyArray => JsonValue::Array(vec![]),
ScalarRef::EmptyMap => JsonValue::Object(JsonMap::new()),
ScalarRef::Binary(x) => JsonValue::String(String::from_utf8_lossy(x).to_string()),
ScalarRef::Binary(x) => JsonValue::String(hex::encode(x)),
ScalarRef::String(x) => JsonValue::String(String::from_utf8_lossy(x).to_string()),
ScalarRef::Array(x) => {
let vals = x
Expand Down
67 changes: 67 additions & 0 deletions src/query/functions/src/scalars/binary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2021 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use databend_common_arrow::arrow::bitmap::Bitmap;
use databend_common_expression::error_to_null;
use databend_common_expression::types::nullable::NullableColumn;
use databend_common_expression::types::BinaryType;
use databend_common_expression::types::StringType;
use databend_common_expression::vectorize_with_builder_1_arg;
use databend_common_expression::EvalContext;
use databend_common_expression::FunctionDomain;
use databend_common_expression::FunctionRegistry;
use databend_common_expression::Value;
use databend_common_expression::ValueRef;

pub fn register(registry: &mut FunctionRegistry) {
registry.register_passthrough_nullable_1_arg::<BinaryType, StringType, _, _>(
"to_string",
|_, _| FunctionDomain::Full,
eval_binary_to_string,
);

registry.register_combine_nullable_1_arg::<BinaryType, StringType, _, _>(
"try_to_string",
|_, _| FunctionDomain::Full,
error_to_null(eval_binary_to_string),
);

registry.register_passthrough_nullable_1_arg::<StringType, BinaryType, _, _>(
"to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
ValueRef::Scalar(val) => Value::Scalar(val.to_vec()),
ValueRef::Column(col) => Value::Column(col),
},
);

registry.register_combine_nullable_1_arg::<StringType, BinaryType, _, _>(
"try_to_binary",
|_, _| FunctionDomain::Full,
|val, _| match val {
ValueRef::Scalar(val) => Value::Scalar(Some(val.to_vec())),
ValueRef::Column(col) => Value::Column(NullableColumn {
validity: Bitmap::new_constant(true, col.len()),
column: col,
}),
},
);
}

fn eval_binary_to_string(val: ValueRef<BinaryType>, ctx: &mut EvalContext) -> Value<StringType> {
vectorize_with_builder_1_arg::<BinaryType, StringType>(|val, output, _| {
output.put_slice(val);
output.commit_row();
})(val, ctx)
}
2 changes: 2 additions & 0 deletions src/query/functions/src/scalars/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use databend_common_expression::FunctionRegistry;
mod arithmetic;
mod arithmetic_modulo;
mod array;
mod binary;
mod bitmap;
mod boolean;
mod comparison;
Expand Down Expand Up @@ -50,6 +51,7 @@ pub fn register(registry: &mut FunctionRegistry) {
datetime::register(registry);
math::register(registry);
map::register(registry);
binary::register(registry);
string::register(registry);
string_multi_args::register(registry);
tuple::register(registry);
Expand Down
84 changes: 82 additions & 2 deletions src/query/functions/tests/it/scalars/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ fn test_cast() {
test_cast_between_number_and_boolean(file, is_try);
test_cast_between_date_and_timestamp(file, is_try);
test_cast_between_string_and_timestamp(file, is_try);
test_between_string_and_date(file, is_try);
test_cast_between_string_and_date(file, is_try);
test_cast_to_nested_type(file, is_try);
test_cast_between_binary_and_string(file, is_try);
}
}

Expand Down Expand Up @@ -527,7 +528,7 @@ fn test_cast_between_string_and_timestamp(file: &mut impl Write, is_try: bool) {
)]);
}

fn test_between_string_and_date(file: &mut impl Write, is_try: bool) {
fn test_cast_between_string_and_date(file: &mut impl Write, is_try: bool) {
let prefix = if is_try { "TRY_" } else { "" };

run_ast(file, format!("{prefix}TO_DATE('2022')"), &[]);
Expand Down Expand Up @@ -672,6 +673,85 @@ fn test_cast_between_string_and_decimal(file: &mut impl Write, is_try: bool) {
);
}

fn test_cast_between_binary_and_string(file: &mut impl Write, is_try: bool) {
let prefix = if is_try { "TRY_" } else { "" };

run_ast(file, format!("{prefix}CAST('Abc' AS BINARY)"), &[]);
run_ast(file, format!("{prefix}CAST('Dobrý den' AS BINARY)"), &[]);
run_ast(file, format!("{prefix}CAST('ß😀山' AS BINARY)"), &[]);
run_ast(file, format!("{prefix}CAST(NULL AS BINARY)"), &[]);
run_ast(file, format!("{prefix}CAST(NULL AS BINARY NULL)"), &[]);
run_ast(file, format!("{prefix}CAST(a AS BINARY)"), &[(
"a",
StringType::from_data(vec!["Abc", "Dobrý den", "ß😀山"]),
)]);
run_ast(file, format!("{prefix}CAST(a AS BINARY)"), &[(
"a",
StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![
true, true, false,
]),
)]);
run_ast(file, format!("{prefix}CAST(a AS BINARY NULL)"), &[(
"a",
StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![
true, true, false,
]),
)]);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST('Abc' AS BINARY) AS STRING)"),
&[],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST('Dobrý den' AS BINARY) AS STRING)"),
&[],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST('ß😀山' AS BINARY) AS STRING)"),
&[],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST(NULL AS BINARY) AS STRING)"),
&[],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST(NULL AS BINARY NULL) AS STRING NULL)"),
&[],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST(a AS BINARY) AS STRING)"),
&[(
"a",
StringType::from_data(vec!["Abc", "Dobrý den", "ß😀山"]),
)],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST(a AS BINARY) AS STRING)"),
&[(
"a",
StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![
true, true, false,
]),
)],
);
run_ast(
file,
format!("{prefix}CAST({prefix}CAST(a AS BINARY NULL) AS STRING NULL)"),
&[(
"a",
StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![
true, true, false,
]),
)],
);
}

fn gen_bitmap_data() -> Column {
// construct bitmap column with 4 row:
// 0..5, 1..6, 2..7, 3..8
Expand Down
2 changes: 1 addition & 1 deletion src/query/functions/tests/it/scalars/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ fn transform_data_type(target_type: databend_common_ast::ast::TypeName) -> DataT
databend_common_ast::ast::TypeName::Decimal { precision, scale } => {
DataType::Decimal(DecimalDataType::from_size(DecimalSize { precision, scale }).unwrap())
}
databend_common_ast::ast::TypeName::Binary => DataType::String,
databend_common_ast::ast::TypeName::Binary => DataType::Binary,
databend_common_ast::ast::TypeName::String => DataType::String,
databend_common_ast::ast::TypeName::Timestamp => DataType::Timestamp,
databend_common_ast::ast::TypeName::Date => DataType::Date,
Expand Down
Loading

0 comments on commit 989fb49

Please sign in to comment.