From 989fb495db4eafe513dbc46359a94d7d87fb7d33 Mon Sep 17 00:00:00 2001 From: Andy Lok Date: Sun, 7 Jan 2024 09:45:52 +0800 Subject: [PATCH] feat: cast between string and binary (#14247) * feat: cast between string and binary * fmt * fix * fix * improve --- Cargo.lock | 1 + src/query/expression/src/type_check.rs | 1 + src/query/expression/src/values.rs | 3 + src/query/formats/Cargo.toml | 1 + src/query/formats/src/field_encoder/csv.rs | 5 + src/query/formats/src/output_format/json.rs | 2 +- src/query/functions/src/scalars/binary.rs | 67 +++ src/query/functions/src/scalars/mod.rs | 2 + src/query/functions/tests/it/scalars/cast.rs | 84 +++- .../functions/tests/it/scalars/parser.rs | 2 +- .../tests/it/scalars/testdata/cast.txt | 414 ++++++++++++++++++ .../it/scalars/testdata/function_list.txt | 12 +- .../interpreter_table_modify_column.rs | 93 ++++ .../mysql/writers/query_result_writer.rs | 1 + .../sql/src/planner/semantic/type_check.rs | 2 +- .../base/05_ddl/05_0003_ddl_alter_table.test | 66 +++ 16 files changed, 749 insertions(+), 7 deletions(-) create mode 100644 src/query/functions/src/scalars/binary.rs diff --git a/Cargo.lock b/Cargo.lock index bcf296515786..2ab4f171a18e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2699,6 +2699,7 @@ dependencies = [ "databend-common-settings", "databend-storages-common-blocks", "databend-storages-common-table-meta", + "hex", "jsonb 0.3.0 (git+https://github.com/datafuselabs/jsonb?rev=582c139)", "lexical-core", "match-template", diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index 67b1726ef711..17bd0afb5dca 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -706,6 +706,7 @@ pub fn get_simple_cast_function(is_try: bool, dest_type: &DataType) -> Option n1.partial_cmp(n2), (Scalar::Decimal(d1), Scalar::Decimal(d2)) => d1.partial_cmp(d2), (Scalar::Boolean(b1), Scalar::Boolean(b2)) => b1.partial_cmp(b2), + (Scalar::Binary(s1), Scalar::Binary(s2)) => s1.partial_cmp(s2), (Scalar::String(s1), Scalar::String(s2)) => s1.partial_cmp(s2), (Scalar::Timestamp(t1), Scalar::Timestamp(t2)) => t1.partial_cmp(t2), (Scalar::Date(d1), Scalar::Date(d2)) => d1.partial_cmp(d2), @@ -628,6 +629,7 @@ impl PartialOrd for ScalarRef<'_> { (ScalarRef::Number(n1), ScalarRef::Number(n2)) => n1.partial_cmp(n2), (ScalarRef::Decimal(d1), ScalarRef::Decimal(d2)) => d1.partial_cmp(d2), (ScalarRef::Boolean(b1), ScalarRef::Boolean(b2)) => b1.partial_cmp(b2), + (ScalarRef::Binary(s1), ScalarRef::Binary(s2)) => s1.partial_cmp(s2), (ScalarRef::String(s1), ScalarRef::String(s2)) => s1.partial_cmp(s2), (ScalarRef::Timestamp(t1), ScalarRef::Timestamp(t2)) => t1.partial_cmp(t2), (ScalarRef::Date(d1), ScalarRef::Date(d2)) => d1.partial_cmp(d2), @@ -708,6 +710,7 @@ impl PartialOrd for Column { (Column::Number(col1), Column::Number(col2)) => col1.partial_cmp(col2), (Column::Decimal(col1), Column::Decimal(col2)) => col1.partial_cmp(col2), (Column::Boolean(col1), Column::Boolean(col2)) => col1.iter().partial_cmp(col2.iter()), + (Column::Binary(col1), Column::Binary(col2)) => col1.iter().partial_cmp(col2.iter()), (Column::String(col1), Column::String(col2)) => col1.iter().partial_cmp(col2.iter()), (Column::Timestamp(col1), Column::Timestamp(col2)) => { col1.iter().partial_cmp(col2.iter()) diff --git a/src/query/formats/Cargo.toml b/src/query/formats/Cargo.toml index 1d8870784aab..17b144f6a1eb 100644 --- a/src/query/formats/Cargo.toml +++ b/src/query/formats/Cargo.toml @@ -15,6 +15,7 @@ aho-corasick = { version = "1.0.1" } async-trait = { workspace = true } bstr = "1.0.1" chrono-tz = { workspace = true } +hex = "0.4.3" lexical-core = "0.8.5" match-template = { workspace = true } micromarshal = "0.4.0" diff --git a/src/query/formats/src/field_encoder/csv.rs b/src/query/formats/src/field_encoder/csv.rs index 6f13d6531732..9655f3bac2b4 100644 --- a/src/query/formats/src/field_encoder/csv.rs +++ b/src/query/formats/src/field_encoder/csv.rs @@ -117,6 +117,11 @@ impl FieldEncoderCSV { pub(crate) fn write_field(&self, column: &Column, row_index: usize, out_buf: &mut Vec) { match &column { Column::Nullable(box c) => self.write_nullable(c, row_index, out_buf), + + Column::Binary(c) => { + let buf = unsafe { c.index_unchecked(row_index) }; + self.string_formatter.write_string(buf, out_buf); + } Column::String(c) => { let buf = unsafe { c.index_unchecked(row_index) }; self.string_formatter.write_string(buf, out_buf); diff --git a/src/query/formats/src/output_format/json.rs b/src/query/formats/src/output_format/json.rs index 0b82767ab7a9..cfac8a10a6e2 100644 --- a/src/query/formats/src/output_format/json.rs +++ b/src/query/formats/src/output_format/json.rs @@ -98,7 +98,7 @@ fn scalar_to_json(s: ScalarRef<'_>, format: &FormatSettings) -> JsonValue { } ScalarRef::EmptyArray => JsonValue::Array(vec![]), ScalarRef::EmptyMap => JsonValue::Object(JsonMap::new()), - ScalarRef::Binary(x) => JsonValue::String(String::from_utf8_lossy(x).to_string()), + ScalarRef::Binary(x) => JsonValue::String(hex::encode(x)), ScalarRef::String(x) => JsonValue::String(String::from_utf8_lossy(x).to_string()), ScalarRef::Array(x) => { let vals = x diff --git a/src/query/functions/src/scalars/binary.rs b/src/query/functions/src/scalars/binary.rs new file mode 100644 index 000000000000..d84154e11e47 --- /dev/null +++ b/src/query/functions/src/scalars/binary.rs @@ -0,0 +1,67 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_arrow::arrow::bitmap::Bitmap; +use databend_common_expression::error_to_null; +use databend_common_expression::types::nullable::NullableColumn; +use databend_common_expression::types::BinaryType; +use databend_common_expression::types::StringType; +use databend_common_expression::vectorize_with_builder_1_arg; +use databend_common_expression::EvalContext; +use databend_common_expression::FunctionDomain; +use databend_common_expression::FunctionRegistry; +use databend_common_expression::Value; +use databend_common_expression::ValueRef; + +pub fn register(registry: &mut FunctionRegistry) { + registry.register_passthrough_nullable_1_arg::( + "to_string", + |_, _| FunctionDomain::Full, + eval_binary_to_string, + ); + + registry.register_combine_nullable_1_arg::( + "try_to_string", + |_, _| FunctionDomain::Full, + error_to_null(eval_binary_to_string), + ); + + registry.register_passthrough_nullable_1_arg::( + "to_binary", + |_, _| FunctionDomain::Full, + |val, _| match val { + ValueRef::Scalar(val) => Value::Scalar(val.to_vec()), + ValueRef::Column(col) => Value::Column(col), + }, + ); + + registry.register_combine_nullable_1_arg::( + "try_to_binary", + |_, _| FunctionDomain::Full, + |val, _| match val { + ValueRef::Scalar(val) => Value::Scalar(Some(val.to_vec())), + ValueRef::Column(col) => Value::Column(NullableColumn { + validity: Bitmap::new_constant(true, col.len()), + column: col, + }), + }, + ); +} + +fn eval_binary_to_string(val: ValueRef, ctx: &mut EvalContext) -> Value { + vectorize_with_builder_1_arg::(|val, output, _| { + output.put_slice(val); + output.commit_row(); + })(val, ctx) +} diff --git a/src/query/functions/src/scalars/mod.rs b/src/query/functions/src/scalars/mod.rs index 464ee93a5aa0..2c5a799df179 100644 --- a/src/query/functions/src/scalars/mod.rs +++ b/src/query/functions/src/scalars/mod.rs @@ -17,6 +17,7 @@ use databend_common_expression::FunctionRegistry; mod arithmetic; mod arithmetic_modulo; mod array; +mod binary; mod bitmap; mod boolean; mod comparison; @@ -50,6 +51,7 @@ pub fn register(registry: &mut FunctionRegistry) { datetime::register(registry); math::register(registry); map::register(registry); + binary::register(registry); string::register(registry); string_multi_args::register(registry); tuple::register(registry); diff --git a/src/query/functions/tests/it/scalars/cast.rs b/src/query/functions/tests/it/scalars/cast.rs index 9ed9a6a79a5a..2e123c3ed7dc 100644 --- a/src/query/functions/tests/it/scalars/cast.rs +++ b/src/query/functions/tests/it/scalars/cast.rs @@ -39,8 +39,9 @@ fn test_cast() { test_cast_between_number_and_boolean(file, is_try); test_cast_between_date_and_timestamp(file, is_try); test_cast_between_string_and_timestamp(file, is_try); - test_between_string_and_date(file, is_try); + test_cast_between_string_and_date(file, is_try); test_cast_to_nested_type(file, is_try); + test_cast_between_binary_and_string(file, is_try); } } @@ -527,7 +528,7 @@ fn test_cast_between_string_and_timestamp(file: &mut impl Write, is_try: bool) { )]); } -fn test_between_string_and_date(file: &mut impl Write, is_try: bool) { +fn test_cast_between_string_and_date(file: &mut impl Write, is_try: bool) { let prefix = if is_try { "TRY_" } else { "" }; run_ast(file, format!("{prefix}TO_DATE('2022')"), &[]); @@ -672,6 +673,85 @@ fn test_cast_between_string_and_decimal(file: &mut impl Write, is_try: bool) { ); } +fn test_cast_between_binary_and_string(file: &mut impl Write, is_try: bool) { + let prefix = if is_try { "TRY_" } else { "" }; + + run_ast(file, format!("{prefix}CAST('Abc' AS BINARY)"), &[]); + run_ast(file, format!("{prefix}CAST('Dobrý den' AS BINARY)"), &[]); + run_ast(file, format!("{prefix}CAST('ß😀山' AS BINARY)"), &[]); + run_ast(file, format!("{prefix}CAST(NULL AS BINARY)"), &[]); + run_ast(file, format!("{prefix}CAST(NULL AS BINARY NULL)"), &[]); + run_ast(file, format!("{prefix}CAST(a AS BINARY)"), &[( + "a", + StringType::from_data(vec!["Abc", "Dobrý den", "ß😀山"]), + )]); + run_ast(file, format!("{prefix}CAST(a AS BINARY)"), &[( + "a", + StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![ + true, true, false, + ]), + )]); + run_ast(file, format!("{prefix}CAST(a AS BINARY NULL)"), &[( + "a", + StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![ + true, true, false, + ]), + )]); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST('Abc' AS BINARY) AS STRING)"), + &[], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST('Dobrý den' AS BINARY) AS STRING)"), + &[], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST('ß😀山' AS BINARY) AS STRING)"), + &[], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST(NULL AS BINARY) AS STRING)"), + &[], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST(NULL AS BINARY NULL) AS STRING NULL)"), + &[], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST(a AS BINARY) AS STRING)"), + &[( + "a", + StringType::from_data(vec!["Abc", "Dobrý den", "ß😀山"]), + )], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST(a AS BINARY) AS STRING)"), + &[( + "a", + StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![ + true, true, false, + ]), + )], + ); + run_ast( + file, + format!("{prefix}CAST({prefix}CAST(a AS BINARY NULL) AS STRING NULL)"), + &[( + "a", + StringType::from_data_with_validity(vec!["Abc", "Dobrý den", "ß😀山"], vec![ + true, true, false, + ]), + )], + ); +} + fn gen_bitmap_data() -> Column { // construct bitmap column with 4 row: // 0..5, 1..6, 2..7, 3..8 diff --git a/src/query/functions/tests/it/scalars/parser.rs b/src/query/functions/tests/it/scalars/parser.rs index 18aa64c5e0b8..661669d7df48 100644 --- a/src/query/functions/tests/it/scalars/parser.rs +++ b/src/query/functions/tests/it/scalars/parser.rs @@ -532,7 +532,7 @@ fn transform_data_type(target_type: databend_common_ast::ast::TypeName) -> DataT databend_common_ast::ast::TypeName::Decimal { precision, scale } => { DataType::Decimal(DecimalDataType::from_size(DecimalSize { precision, scale }).unwrap()) } - databend_common_ast::ast::TypeName::Binary => DataType::String, + databend_common_ast::ast::TypeName::Binary => DataType::Binary, databend_common_ast::ast::TypeName::String => DataType::String, databend_common_ast::ast::TypeName::Timestamp => DataType::Timestamp, databend_common_ast::ast::TypeName::Date => DataType::Date, diff --git a/src/query/functions/tests/it/scalars/testdata/cast.txt b/src/query/functions/tests/it/scalars/testdata/cast.txt index fd231e1ca32a..e6abc888b0eb 100644 --- a/src/query/functions/tests/it/scalars/testdata/cast.txt +++ b/src/query/functions/tests/it/scalars/testdata/cast.txt @@ -1846,6 +1846,198 @@ error: +ast : CAST('Abc' AS BINARY) +raw expr : CAST('Abc' AS Binary) +checked expr : to_binary("Abc") +optimized expr : 0x416263 +output type : Binary +output domain : Undefined +output : 0x416263 + + +ast : CAST('Dobrý den' AS BINARY) +raw expr : CAST('Dobrý den' AS Binary) +checked expr : to_binary("Dobrý den") +optimized expr : 0x446f6272c3bd2064656e +output type : Binary +output domain : Undefined +output : 0x446f6272c3bd2064656e + + +ast : CAST('ß😀山' AS BINARY) +raw expr : CAST('ß😀山' AS Binary) +checked expr : to_binary("ß😀山") +optimized expr : 0xc39ff09f9880e5b1b1 +output type : Binary +output domain : Undefined +output : 0xc39ff09f9880e5b1b1 + + +error: + --> SQL:1:1 + | +1 | CAST(NULL AS BINARY) + | ^^^^^^^^^^^^^^^^^^^^ unable to cast type `NULL` to type `Binary` + + + +ast : CAST(NULL AS BINARY NULL) +raw expr : CAST(NULL AS Binary NULL) +checked expr : CAST(NULL AS Binary NULL) +optimized expr : NULL +output type : Binary NULL +output domain : {NULL} +output : NULL + + +ast : CAST(a AS BINARY) +raw expr : CAST(a::String AS Binary) +checked expr : to_binary(a) +evaluation: ++--------+-------------------+------------------------+ +| | a | Output | ++--------+-------------------+------------------------+ +| Type | String | Binary | +| Domain | {"Abc"..="ß😀山"} | Undefined | +| Row 0 | 'Abc' | 0x416263 | +| Row 1 | 'Dobrý den' | 0x446f6272c3bd2064656e | +| Row 2 | 'ß😀山' | 0xc39ff09f9880e5b1b1 | ++--------+-------------------+------------------------+ +evaluation (internal): ++--------+------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +| Output | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | ++--------+------------------------------------------------------------------------------------------------+ + + +error: + --> SQL:1:1 + | +1 | CAST(a AS BINARY) + | ^^^^^^^^^^^^^^^^^ unable to cast `NULL` to type `Binary` + + + +ast : CAST(a AS BINARY NULL) +raw expr : CAST(a::String NULL AS Binary NULL) +checked expr : CAST(a AS Binary NULL) +evaluation: ++--------+----------------------------+------------------------+ +| | a | Output | ++--------+----------------------------+------------------------+ +| Type | String NULL | Binary NULL | +| Domain | {"Abc"..="ß😀山"} ∪ {NULL} | Undefined ∪ {NULL} | +| Row 0 | 'Abc' | 0x416263 | +| Row 1 | 'Dobrý den' | 0x446f6272c3bd2064656e | +| Row 2 | NULL | NULL | ++--------+----------------------------+------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : CAST(CAST('Abc' AS BINARY) AS STRING) +raw expr : CAST(CAST('Abc' AS Binary) AS String) +checked expr : to_string(to_binary("Abc")) +optimized expr : "Abc" +output type : String +output domain : {"Abc"..="Abc"} +output : 'Abc' + + +ast : CAST(CAST('Dobrý den' AS BINARY) AS STRING) +raw expr : CAST(CAST('Dobrý den' AS Binary) AS String) +checked expr : to_string(to_binary("Dobrý den")) +optimized expr : "Dobrý den" +output type : String +output domain : {"Dobrý den"..="Dobrý den"} +output : 'Dobrý den' + + +ast : CAST(CAST('ß😀山' AS BINARY) AS STRING) +raw expr : CAST(CAST('ß😀山' AS Binary) AS String) +checked expr : to_string(to_binary("ß😀山")) +optimized expr : "ß😀山" +output type : String +output domain : {"ß😀山"..="ß😀山"} +output : 'ß😀山' + + +error: + --> SQL:1:6 + | +1 | CAST(CAST(NULL AS BINARY) AS STRING) + | ^^^^^^^^^^^^^^^^^^^^ unable to cast type `NULL` to type `Binary` + + + +ast : CAST(CAST(NULL AS BINARY NULL) AS STRING NULL) +raw expr : CAST(CAST(NULL AS Binary NULL) AS String NULL) +checked expr : CAST(CAST(NULL AS Binary NULL) AS String NULL) +optimized expr : NULL +output type : String NULL +output domain : {NULL} +output : NULL + + +ast : CAST(CAST(a AS BINARY) AS STRING) +raw expr : CAST(CAST(a::String AS Binary) AS String) +checked expr : to_string(to_binary(a)) +evaluation: ++--------+-------------------+-------------+ +| | a | Output | ++--------+-------------------+-------------+ +| Type | String | String | +| Domain | {"Abc"..="ß😀山"} | {""..} | +| Row 0 | 'Abc' | 'Abc' | +| Row 1 | 'Dobrý den' | 'Dobrý den' | +| Row 2 | 'ß😀山' | 'ß😀山' | ++--------+-------------------+-------------+ +evaluation (internal): ++--------+------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +| Output | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | ++--------+------------------------------------------------------------------------------------------------+ + + +error: + --> SQL:1:6 + | +1 | CAST(CAST(a AS BINARY) AS STRING) + | ^^^^^^^^^^^^^^^^^ unable to cast `NULL` to type `Binary` + + + +ast : CAST(CAST(a AS BINARY NULL) AS STRING NULL) +raw expr : CAST(CAST(a::String NULL AS Binary NULL) AS String NULL) +checked expr : CAST(CAST(a AS Binary NULL) AS String NULL) +evaluation: ++--------+----------------------------+-----------------+ +| | a | Output | ++--------+----------------------------+-----------------+ +| Type | String NULL | String NULL | +| Domain | {"Abc"..="ß😀山"} ∪ {NULL} | {""..} ∪ {NULL} | +| Row 0 | 'Abc' | 'Abc' | +| Row 1 | 'Dobrý den' | 'Dobrý den' | +| Row 2 | NULL | NULL | ++--------+----------------------------+-----------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + ast : TRY_CAST(0 AS UINT8) raw expr : TRY_CAST(0 AS UInt8) checked expr : TRY_CAST(0_u8 AS UInt8 NULL) @@ -3883,3 +4075,225 @@ output domain : [[{0..=0} ∪ {NULL}]] output : [[NULL], [NULL, NULL]] +ast : TRY_CAST('Abc' AS BINARY) +raw expr : TRY_CAST('Abc' AS Binary) +checked expr : try_to_binary("Abc") +optimized expr : 0x416263 +output type : Binary NULL +output domain : Undefined +output : 0x416263 + + +ast : TRY_CAST('Dobrý den' AS BINARY) +raw expr : TRY_CAST('Dobrý den' AS Binary) +checked expr : try_to_binary("Dobrý den") +optimized expr : 0x446f6272c3bd2064656e +output type : Binary NULL +output domain : Undefined +output : 0x446f6272c3bd2064656e + + +ast : TRY_CAST('ß😀山' AS BINARY) +raw expr : TRY_CAST('ß😀山' AS Binary) +checked expr : try_to_binary("ß😀山") +optimized expr : 0xc39ff09f9880e5b1b1 +output type : Binary NULL +output domain : Undefined +output : 0xc39ff09f9880e5b1b1 + + +ast : TRY_CAST(NULL AS BINARY) +raw expr : TRY_CAST(NULL AS Binary) +checked expr : try_to_binary(CAST(NULL AS String NULL)) +optimized expr : NULL +output type : Binary NULL +output domain : {NULL} +output : NULL + + +ast : TRY_CAST(NULL AS BINARY NULL) +raw expr : TRY_CAST(NULL AS Binary NULL) +checked expr : TRY_CAST(NULL AS Binary NULL) +optimized expr : NULL +output type : Binary NULL +output domain : {NULL} +output : NULL + + +ast : TRY_CAST(a AS BINARY) +raw expr : TRY_CAST(a::String AS Binary) +checked expr : try_to_binary(a) +evaluation: ++--------+-------------------+------------------------+ +| | a | Output | ++--------+-------------------+------------------------+ +| Type | String | Binary NULL | +| Domain | {"Abc"..="ß😀山"} | Undefined ∪ {NULL} | +| Row 0 | 'Abc' | 0x416263 | +| Row 1 | 'Dobrý den' | 0x446f6272c3bd2064656e | +| Row 2 | 'ß😀山' | 0xc39ff09f9880e5b1b1 | ++--------+-------------------+------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : TRY_CAST(a AS BINARY) +raw expr : TRY_CAST(a::String NULL AS Binary) +checked expr : try_to_binary(a) +evaluation: ++--------+----------------------------+------------------------+ +| | a | Output | ++--------+----------------------------+------------------------+ +| Type | String NULL | Binary NULL | +| Domain | {"Abc"..="ß😀山"} ∪ {NULL} | Undefined ∪ {NULL} | +| Row 0 | 'Abc' | 0x416263 | +| Row 1 | 'Dobrý den' | 0x446f6272c3bd2064656e | +| Row 2 | NULL | NULL | ++--------+----------------------------+------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : TRY_CAST(a AS BINARY NULL) +raw expr : TRY_CAST(a::String NULL AS Binary NULL) +checked expr : TRY_CAST(a AS Binary NULL) +evaluation: ++--------+----------------------------+------------------------+ +| | a | Output | ++--------+----------------------------+------------------------+ +| Type | String NULL | Binary NULL | +| Domain | {"Abc"..="ß😀山"} ∪ {NULL} | Undefined ∪ {NULL} | +| Row 0 | 'Abc' | 0x416263 | +| Row 1 | 'Dobrý den' | 0x446f6272c3bd2064656e | +| Row 2 | NULL | NULL | ++--------+----------------------------+------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : TRY_CAST(TRY_CAST('Abc' AS BINARY) AS STRING) +raw expr : TRY_CAST(TRY_CAST('Abc' AS Binary) AS String) +checked expr : try_to_string(try_to_binary("Abc")) +optimized expr : "Abc" +output type : String NULL +output domain : {"Abc"..="Abc"} +output : 'Abc' + + +ast : TRY_CAST(TRY_CAST('Dobrý den' AS BINARY) AS STRING) +raw expr : TRY_CAST(TRY_CAST('Dobrý den' AS Binary) AS String) +checked expr : try_to_string(try_to_binary("Dobrý den")) +optimized expr : "Dobrý den" +output type : String NULL +output domain : {"Dobrý den"..="Dobrý den"} +output : 'Dobrý den' + + +ast : TRY_CAST(TRY_CAST('ß😀山' AS BINARY) AS STRING) +raw expr : TRY_CAST(TRY_CAST('ß😀山' AS Binary) AS String) +checked expr : try_to_string(try_to_binary("ß😀山")) +optimized expr : "ß😀山" +output type : String NULL +output domain : {"ß😀山"..="ß😀山"} +output : 'ß😀山' + + +ast : TRY_CAST(TRY_CAST(NULL AS BINARY) AS STRING) +raw expr : TRY_CAST(TRY_CAST(NULL AS Binary) AS String) +checked expr : try_to_string(try_to_binary(CAST(NULL AS String NULL))) +optimized expr : NULL +output type : String NULL +output domain : {NULL} +output : NULL + + +ast : TRY_CAST(TRY_CAST(NULL AS BINARY NULL) AS STRING NULL) +raw expr : TRY_CAST(TRY_CAST(NULL AS Binary NULL) AS String NULL) +checked expr : TRY_CAST(TRY_CAST(NULL AS Binary NULL) AS String NULL) +optimized expr : NULL +output type : String NULL +output domain : {NULL} +output : NULL + + +ast : TRY_CAST(TRY_CAST(a AS BINARY) AS STRING) +raw expr : TRY_CAST(TRY_CAST(a::String AS Binary) AS String) +checked expr : try_to_string(try_to_binary(a)) +evaluation: ++--------+-------------------+-----------------+ +| | a | Output | ++--------+-------------------+-----------------+ +| Type | String | String NULL | +| Domain | {"Abc"..="ß😀山"} | {""..} ∪ {NULL} | +| Row 0 | 'Abc' | 'Abc' | +| Row 1 | 'Dobrý den' | 'Dobrý den' | +| Row 2 | 'ß😀山' | 'ß😀山' | ++--------+-------------------+-----------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : TRY_CAST(TRY_CAST(a AS BINARY) AS STRING) +raw expr : TRY_CAST(TRY_CAST(a::String NULL AS Binary) AS String) +checked expr : try_to_string(try_to_binary(a)) +evaluation: ++--------+----------------------------+-----------------+ +| | a | Output | ++--------+----------------------------+-----------------+ +| Type | String NULL | String NULL | +| Domain | {"Abc"..="ß😀山"} ∪ {NULL} | {""..} ∪ {NULL} | +| Row 0 | 'Abc' | 'Abc' | +| Row 1 | 'Dobrý den' | 'Dobrý den' | +| Row 2 | NULL | NULL | ++--------+----------------------------+-----------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : TRY_CAST(TRY_CAST(a AS BINARY NULL) AS STRING NULL) +raw expr : TRY_CAST(TRY_CAST(a::String NULL AS Binary NULL) AS String NULL) +checked expr : TRY_CAST(TRY_CAST(a AS Binary NULL) AS String NULL) +evaluation: ++--------+----------------------------+-----------------+ +| | a | Output | ++--------+----------------------------+-----------------+ +| Type | String NULL | String NULL | +| Domain | {"Abc"..="ß😀山"} ∪ {NULL} | {""..} ∪ {NULL} | +| Row 0 | 'Abc' | 'Abc' | +| Row 1 | 'Dobrý den' | 'Dobrý den' | +| Row 2 | NULL | NULL | ++--------+----------------------------+-----------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 2b54c6ac0e69..1d1b3c485d88 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -3192,6 +3192,8 @@ Functions overloads: 1 time_slot(Timestamp NULL) :: Timestamp NULL 0 to_base64(String) :: String 1 to_base64(String NULL) :: String NULL +0 to_binary(String) :: Binary +1 to_binary(String NULL) :: Binary NULL 0 to_bitmap(String) :: Bitmap 1 to_bitmap(String NULL) :: Bitmap NULL 2 to_bitmap(UInt64) :: Bitmap @@ -3493,8 +3495,10 @@ Functions overloads: 28 to_string(Date NULL) :: String NULL 29 to_string(Timestamp) :: String 30 to_string(Timestamp NULL) :: String NULL -31 to_string(Bitmap) :: String -32 to_string(Bitmap NULL) :: String NULL +31 to_string(Binary) :: String +32 to_string(Binary NULL) :: String NULL +33 to_string(Bitmap) :: String +34 to_string(Bitmap NULL) :: String NULL 0 to_timestamp(Variant) :: Timestamp 1 to_timestamp(Variant NULL) :: Timestamp NULL 2 to_timestamp(String) :: Timestamp @@ -3702,6 +3706,8 @@ Functions overloads: 1 try_parse_json(Variant NULL) :: Variant NULL 2 try_parse_json(String) :: Variant NULL 3 try_parse_json(String NULL) :: Variant NULL +0 try_to_binary(String) :: Binary NULL +1 try_to_binary(String NULL) :: Binary NULL 0 try_to_boolean(Variant) :: Boolean NULL 1 try_to_boolean(Variant NULL) :: Boolean NULL 2 try_to_boolean(String) :: Boolean NULL @@ -3924,6 +3930,8 @@ Functions overloads: 25 try_to_string(Date NULL) :: String NULL 26 try_to_string(Timestamp) :: String NULL 27 try_to_string(Timestamp NULL) :: String NULL +28 try_to_string(Binary) :: String NULL +29 try_to_string(Binary NULL) :: String NULL 0 try_to_timestamp(Variant) :: Timestamp NULL 1 try_to_timestamp(Variant NULL) :: Timestamp NULL 2 try_to_timestamp(String) :: Timestamp NULL diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index d7651715cdef..e9ee907e213b 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -21,6 +21,7 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ComputedExpr; use databend_common_expression::DataSchema; +use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_license::license::Feature::ComputedColumn; @@ -271,11 +272,103 @@ impl ModifyTableColumnInterpreter { ))); } } + // check if schema has changed if schema == new_schema { return Ok(PipelineBuildResult::create()); } + // if alter column from string to binary, we don't need to rebuild table + let is_alter_column_string_to_binary = + schema + .fields() + .iter() + .zip(new_schema.fields()) + .all(|(old_field, new_field)| { + fn is_string_to_binary(old_ty: &TableDataType, new_ty: &TableDataType) -> bool { + match (old_ty, new_ty) { + (TableDataType::String, TableDataType::Binary) => true, + (TableDataType::Nullable(old_ty), TableDataType::Nullable(new_ty)) => { + is_string_to_binary(old_ty, new_ty) + } + (TableDataType::Map(old_ty), TableDataType::Map(new_ty)) => { + is_string_to_binary(old_ty, new_ty) + } + (TableDataType::Array(old_ty), TableDataType::Array(new_ty)) => { + is_string_to_binary(old_ty, new_ty) + } + ( + TableDataType::Tuple { + fields_type: old_tys, + .. + }, + TableDataType::Tuple { + fields_type: new_tys, + .. + }, + ) => { + old_tys.len() == new_tys.len() + && old_tys + .iter() + .zip(new_tys) + .all(|(old_ty, new_ty)| is_string_to_binary(old_ty, new_ty)) + } + _ => false, + } + } + + let TableField { + name: old_name, + default_expr: old_default_expr, + data_type: old_data_type, + column_id: old_column_id, + computed_expr: old_computed_expr, + } = old_field; + let TableField { + name: new_name, + default_expr: new_default_expr, + data_type: new_data_type, + column_id: new_column_id, + computed_expr: new_computed_expr, + } = new_field; + old_name == new_name + && old_default_expr == new_default_expr + && old_column_id == new_column_id + && old_computed_expr == new_computed_expr + && (old_data_type == new_data_type + || is_string_to_binary(&old_field.data_type, &new_field.data_type)) + }); + if is_alter_column_string_to_binary { + table_info.meta.schema = new_schema.into(); + + let table_id = table_info.ident.table_id; + let table_version = table_info.ident.seq; + + let req = UpdateTableMetaReq { + table_id, + seq: MatchSeq::Exact(table_version), + new_table_meta: table_info.meta, + copied_files: None, + deduplicated_label: None, + update_stream_meta: vec![], + }; + + let res = catalog + .update_table_meta(table.get_table_info(), req) + .await?; + + if let Some(share_table_info) = res.share_table_info { + save_share_table_info( + &self.ctx.get_tenant(), + self.ctx.get_data_operator()?.operator(), + share_table_info, + ) + .await?; + } + + return Ok(PipelineBuildResult::create()); + } + // 1. construct sql for selecting data from old table let mut sql = "select".to_string(); schema diff --git a/src/query/service/src/servers/mysql/writers/query_result_writer.rs b/src/query/service/src/servers/mysql/writers/query_result_writer.rs index 7e76e89fa7f7..d002df967685 100644 --- a/src/query/service/src/servers/mysql/writers/query_result_writer.rs +++ b/src/query/service/src/servers/mysql/writers/query_result_writer.rs @@ -155,6 +155,7 @@ impl<'a, W: AsyncWrite + Send + Unpin> DFQueryResultWriter<'a, W> { DataType::EmptyArray => Ok(ColumnType::MYSQL_TYPE_VARCHAR), DataType::EmptyMap => Ok(ColumnType::MYSQL_TYPE_VARCHAR), DataType::Boolean => Ok(ColumnType::MYSQL_TYPE_SHORT), + DataType::Binary => Ok(ColumnType::MYSQL_TYPE_BLOB), DataType::String => Ok(ColumnType::MYSQL_TYPE_VARCHAR), DataType::Number(num_ty) => match num_ty { NumberDataType::Int8 => Ok(ColumnType::MYSQL_TYPE_TINY), diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 4f56800b6b2b..3d2332c36427 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -3818,7 +3818,7 @@ pub fn resolve_type_name_inner(type_name: &TypeName) -> Result { scale: *scale, })?) } - TypeName::Binary => TableDataType::String, + TypeName::Binary => TableDataType::Binary, TypeName::String => TableDataType::String, TypeName::Timestamp => TableDataType::Timestamp, TypeName::Date => TableDataType::Date, diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test b/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test index d276cd295ddf..a5ff2eb23057 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test +++ b/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test @@ -114,3 +114,69 @@ ALTER TABLE `05_0003_at_t3` MODIFY COLUMN c float not null statement ok DROP TABLE IF EXISTS `05_0003_at_t3` + +statement ok +set hide_options_in_show_create_table=1 + +statement ok +CREATE TABLE `05_0003_at_t4`(a string not null, b string null, c array(string) null, d tuple(string, string) null) ENGINE=FUSE COMPRESSION='zstd' STORAGE_FORMAT='native' + +statement ok +INSERT INTO TABLE `05_0003_at_t4` values('a', 'b', ['c1', 'c2'], ('d1', 'd2')) + +query TT +SHOW CREATE TABLE `05_0003_at_t4` +---- +05_0003_at_t4 CREATE TABLE `05_0003_at_t4` ( `a` VARCHAR NOT NULL, `b` VARCHAR NULL, `c` ARRAY(STRING) NULL, `d` TUPLE(1 STRING, 2 STRING) NULL ) ENGINE=FUSE + +query TTTT +SELECT * FROM `05_0003_at_t4` +---- +a b ['c1','c2'] ('d1','d2') + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN a binary not null + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN b binary null + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN c array(binary) null + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN d tuple(binary, binary) null + +query TT +SHOW CREATE TABLE `05_0003_at_t4` +---- +05_0003_at_t4 CREATE TABLE `05_0003_at_t4` ( `a` BINARY NOT NULL, `b` BINARY NULL, `c` ARRAY(BINARY) NULL, `d` TUPLE(1 BINARY, 2 BINARY) NULL ) ENGINE=FUSE + +query +SELECT * FROM `05_0003_at_t4` +---- +a b ['c1','c2'] ('d1','d2') + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN a string not null + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN b string null + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN c array(string) null + +statement ok +ALTER TABLE `05_0003_at_t4` MODIFY COLUMN d tuple(string, string) null + +query TT +SHOW CREATE TABLE `05_0003_at_t4` +---- +05_0003_at_t4 CREATE TABLE `05_0003_at_t4` ( `a` VARCHAR NOT NULL, `b` VARCHAR NULL, `c` ARRAY(STRING) NULL, `d` TUPLE(1 STRING, 2 STRING) NULL ) ENGINE=FUSE + +query TTTT +SELECT * FROM `05_0003_at_t4` +---- +a b ['c1','c2'] ('d1','d2') + +statement ok +DROP TABLE IF EXISTS `05_0003_at_t4`