From dcadede16557520c31d82635038406066f13dc0a Mon Sep 17 00:00:00 2001 From: coldWater Date: Mon, 23 Sep 2024 17:36:05 +0800 Subject: [PATCH 1/9] num_to_char Signed-off-by: coldWater --- Cargo.lock | 1 + src/common/io/Cargo.toml | 1 + src/common/io/src/lib.rs | 1 + src/common/io/src/number.rs | 414 ++++++++++++++++++++++++++++++++++++ 4 files changed, 417 insertions(+) create mode 100644 src/common/io/src/number.rs diff --git a/Cargo.lock b/Cargo.lock index fa69ca151cd2..184b9f084683 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3656,6 +3656,7 @@ dependencies = [ "chrono-tz 0.8.6", "databend-common-base", "databend-common-exception", + "enumflags2", "ethnum", "geo", "geos", diff --git a/src/common/io/Cargo.toml b/src/common/io/Cargo.toml index 9af4a6b20b91..88f98e6e3565 100644 --- a/src/common/io/Cargo.toml +++ b/src/common/io/Cargo.toml @@ -18,6 +18,7 @@ chrono = { workspace = true } chrono-tz = { workspace = true } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } +enumflags2 = { workspace = true } ethnum = { workspace = true } geo = { workspace = true } geos = { workspace = true } diff --git a/src/common/io/src/lib.rs b/src/common/io/src/lib.rs index 7b337d75595f..5836169e42f8 100644 --- a/src/common/io/src/lib.rs +++ b/src/common/io/src/lib.rs @@ -26,6 +26,7 @@ pub mod constants; pub mod format_diagnostic; +pub mod number; pub mod prelude; mod binary_read; diff --git a/src/common/io/src/number.rs b/src/common/io/src/number.rs new file mode 100644 index 000000000000..65cecb94430d --- /dev/null +++ b/src/common/io/src/number.rs @@ -0,0 +1,414 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Template Patterns for Numeric Formatting + +use databend_common_exception::Result; +use enumflags2::bitflags; +use enumflags2::make_bitflags; +use enumflags2::BitFlags; + +// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/formatting.c + +#[derive(Clone, Copy)] +enum NumPoz { + NumComma, + NumDec, + Num0, + Num9, + NumB, + NumC, + NumD, + NumE, + NumFM, + NumG, + NumL, + NumMI, + NumPL, + NumPR, + NumRN, + NumSG, + NumSP, + NumS, + NumTH, + NumV, + Numb, + Numc, + Numd, + Nume, + Numfm, + Numg, + Numl, + Nummi, + Numpl, + Numpr, + Numrn, + Numsg, + Numsp, + Nums, + Numth, + Numv, +} + +// ---------- +// Flags for NUMBER version +// ---------- + +#[bitflags] +#[repr(u16)] +#[derive(Clone, Copy, Debug)] +enum NumFlag { + Decimal, + LDecimal, + Zero, + Blank, + FillMode, + LSign, + Bracket, + Minus, + Plus, + Roman, + Multi, + PlusPost, + MinusPost, + EEEE, +} + +enum NumLSign { + Pre, + Post, +} + +#[derive(Clone)] +struct KeyWord { + name: &'static str, + id: NumPoz, + // is_digit: bool, + // date_mode: FromCharDateMode, +} + +impl KeyWord { + const fn new(name: &'static str, id: NumPoz) -> KeyWord { + KeyWord { name, id } + } +} + +const NUM_KEYWORDS: [KeyWord; 36] = [ + KeyWord::new(",", NumPoz::NumComma), + KeyWord::new(".", NumPoz::NumDec), + KeyWord::new("0", NumPoz::Num0), + KeyWord::new("9", NumPoz::Num9), + KeyWord::new("B", NumPoz::NumB), + KeyWord::new("C", NumPoz::NumC), + KeyWord::new("D", NumPoz::NumD), + KeyWord::new("EEEE", NumPoz::NumE), + KeyWord::new("FM", NumPoz::NumFM), + KeyWord::new("G", NumPoz::NumG), + KeyWord::new("L", NumPoz::NumL), + KeyWord::new("MI", NumPoz::NumMI), + KeyWord::new("PL", NumPoz::NumPL), + KeyWord::new("PR", NumPoz::NumPR), + KeyWord::new("RN", NumPoz::NumRN), + KeyWord::new("SG", NumPoz::NumSG), + KeyWord::new("SP", NumPoz::NumSP), + KeyWord::new("S", NumPoz::NumS), + KeyWord::new("TH", NumPoz::NumTH), + KeyWord::new("V", NumPoz::NumV), + KeyWord::new("b", NumPoz::NumB), + KeyWord::new("c", NumPoz::Numc), + KeyWord::new("d", NumPoz::NumD), + KeyWord::new("eeee", NumPoz::NumE), + KeyWord::new("fm", NumPoz::NumFM), + KeyWord::new("g", NumPoz::NumG), + KeyWord::new("l", NumPoz::NumL), + KeyWord::new("mi", NumPoz::NumMI), + KeyWord::new("pl", NumPoz::NumPL), + KeyWord::new("pr", NumPoz::NumPR), + KeyWord::new("rn", NumPoz::Numrn), + KeyWord::new("sg", NumPoz::NumSG), + KeyWord::new("sp", NumPoz::NumSP), + KeyWord::new("s", NumPoz::NumS), + KeyWord::new("th", NumPoz::Numth), + KeyWord::new("v", NumPoz::NumV), +]; + +struct FormatNode { + typ: NodeType, + character: Vec, // if type is CHAR + suffix: u8, // keyword prefix/suffix code, if any + key: KeyWord, // if type is ACTION +} + +enum NodeType { + End, + Action, + Char, + Separator, + Space, +} + +// ---------- +// Number description struct +// ---------- +#[derive(Default, Debug)] +struct NumDesc { + pre: usize, // (count) numbers before decimal + post: usize, // (count) numbers after decimal + lsign: Option, // want locales sign + flag: BitFlags, // number parameters + pre_lsign_num: usize, // tmp value for lsign + multi: usize, // multiplier for 'V' + zero_start: usize, // position of first zero + zero_end: usize, // position of last zero + need_locale: bool, // needs it locale +} + +impl NumDesc { + pub fn try_new(mut str: &str) -> Result { + let mut num = NumDesc::default(); + + while !str.is_empty() { + match NUM_KEYWORDS.iter().find(|k| str.starts_with(k.name)) { + Some(k) => { + let n = FormatNode { + typ: NodeType::Action, + character: Vec::new(), + suffix: 0, + key: k.clone(), + }; + + num.prepare(&n)?; + + str = &str[k.name.len()..] + } + None => todo!(), + } + } + Ok(num) + } + + fn prepare(&mut self, n: &FormatNode) -> std::result::Result<(), &'static str> { + if !matches!(n.typ, NodeType::Action) { + return Ok(()); + } + + if self.flag.contains(NumFlag::EEEE) && !matches!(n.key.id, NumPoz::NumE) { + return Err("\"EEEE\" must be the last pattern used"); + } + + match n.key.id { + NumPoz::Num9 => { + if self.flag.contains(NumFlag::Bracket) { + return Err("\"9\" must be ahead of \"PR\""); + } + + if self.flag.contains(NumFlag::Multi) { + self.multi += 1; + return Ok(()); + } + + if self.flag.contains(NumFlag::Decimal) { + self.post += 1; + } else { + self.pre += 1; + } + return Ok(()); + } + + NumPoz::Num0 => { + if self.flag.contains(NumFlag::Bracket) { + return Err("\"0\" must be ahead of \"PR\""); + } + + if !self.flag.intersects(NumFlag::Zero | NumFlag::Decimal) { + self.flag.insert(NumFlag::Zero); + self.zero_start = self.pre + 1; + } + + if !self.flag.contains(NumFlag::Decimal) { + self.pre += 1; + } else { + self.post += 1; + } + + self.zero_end = self.pre + self.post; + Ok(()) + } + + NumPoz::NumB => { + if self.pre == 0 && self.post == 0 && !self.flag.contains(NumFlag::Zero) { + self.flag.insert(NumFlag::Blank) + } + Ok(()) + } + + NumPoz::NumD => { + self.flag.insert(NumFlag::LDecimal); + self.need_locale = true; + + if self.flag.contains(NumFlag::Decimal) { + return Err("multiple decimal points"); + } + if self.flag.contains(NumFlag::Multi) { + return Err("cannot use \"V\" and decimal point together"); + } + + self.flag.insert(NumFlag::Decimal); + Ok(()) + } + + NumPoz::NumDec => { + if self.flag.contains(NumFlag::Decimal) { + return Err("multiple decimal points"); + } + if self.flag.contains(NumFlag::Multi) { + return Err("cannot use \"V\" and decimal point together"); + } + + self.flag.insert(NumFlag::Decimal); + Ok(()) + } + + NumPoz::NumFM => { + self.flag.insert(NumFlag::FillMode); + Ok(()) + } + + NumPoz::NumS => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" twice"); + } + if self + .flag + .intersects(NumFlag::Plus | NumFlag::Minus | NumFlag::Bracket) + { + return Err("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"); + } + + if self.flag.contains(NumFlag::Decimal) { + self.lsign = Some(NumLSign::Pre); + self.pre_lsign_num = self.pre; + self.need_locale = true; + self.flag.insert(NumFlag::LSign); + return Ok(()); + } + + if self.lsign.is_none() { + self.lsign = Some(NumLSign::Post); + self.need_locale = true; + self.flag.insert(NumFlag::LSign); + } + Ok(()) + } + + NumPoz::NumMI => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" and \"MI\" together"); + } + + self.flag.insert(NumFlag::Minus); + if self.flag.contains(NumFlag::Decimal) { + self.flag.insert(NumFlag::MinusPost) + } + Ok(()) + } + NumPoz::NumPL => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" and \"PL\" together"); + } + + self.flag.insert(NumFlag::Plus); + if self.flag.contains(NumFlag::Decimal) { + self.flag.insert(NumFlag::PlusPost) + } + Ok(()) + } + NumPoz::NumSG => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" and \"SG\" together"); + } + self.flag.insert(NumFlag::Plus | NumFlag::Minus); + Ok(()) + } + NumPoz::NumPR => { + if self + .flag + .intersects(NumFlag::LSign | NumFlag::Plus | NumFlag::Minus) + { + return Err("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"); + } + + self.flag.insert(NumFlag::Bracket); + Ok(()) + } + NumPoz::Numrn | NumPoz::NumRN => { + self.flag.insert(NumFlag::Roman); + Ok(()) + } + + NumPoz::NumL | NumPoz::NumG => { + self.need_locale = true; + Ok(()) + } + + NumPoz::NumV => { + if self.flag.contains(NumFlag::Decimal) { + return Err("cannot use \"V\" and decimal point together"); + } + self.flag.insert(NumFlag::Multi); + Ok(()) + } + + NumPoz::NumE => { + if self.flag.contains(NumFlag::EEEE) { + return Err("cannot use \"EEEE\" twice"); + } + + if self.flag.intersects( + NumFlag::Blank + | NumFlag::FillMode + | NumFlag::LSign + | NumFlag::Bracket + | NumFlag::Minus + | NumFlag::Plus + | NumFlag::Roman + | NumFlag::Multi, + ) { + return Err( + "\"EEEE\" may only be used together with digit and decimal point patterns.", + ); + } + + self.flag.insert(NumFlag::EEEE); + Ok(()) + } + + _ => unreachable!(), + } + } +} + +#[cfg(test)] +mod tests { + use crate::number::NumDesc; + + #[test] + fn test_aa() { + let fmt = "9990999.9"; + + let desc = NumDesc::try_new(fmt).unwrap(); + + println!("{:?}", desc); + } +} From 4853c8819b107777daf6b7f86cc5514a8ae3a4a0 Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 24 Sep 2024 14:50:40 +0800 Subject: [PATCH 2/9] i32_to_char Signed-off-by: coldWater --- src/common/io/src/number.rs | 772 ++++++++++++++++++++++++++++++++++-- 1 file changed, 740 insertions(+), 32 deletions(-) diff --git a/src/common/io/src/number.rs b/src/common/io/src/number.rs index 65cecb94430d..d0f9cf79083d 100644 --- a/src/common/io/src/number.rs +++ b/src/common/io/src/number.rs @@ -16,12 +16,11 @@ use databend_common_exception::Result; use enumflags2::bitflags; -use enumflags2::make_bitflags; use enumflags2::BitFlags; // https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/formatting.c -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] enum NumPoz { NumComma, NumDec, @@ -85,12 +84,13 @@ enum NumFlag { EEEE, } +#[derive(Debug, Clone, Copy)] enum NumLSign { Pre, Post, } -#[derive(Clone)] +#[derive(Debug, Clone)] struct KeyWord { name: &'static str, id: NumPoz, @@ -143,6 +143,7 @@ const NUM_KEYWORDS: [KeyWord; 36] = [ KeyWord::new("v", NumPoz::NumV), ]; +#[derive(Debug)] struct FormatNode { typ: NodeType, character: Vec, // if type is CHAR @@ -150,6 +151,7 @@ struct FormatNode { key: KeyWord, // if type is ACTION } +#[derive(Debug)] enum NodeType { End, Action, @@ -161,7 +163,7 @@ enum NodeType { // ---------- // Number description struct // ---------- -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone)] struct NumDesc { pre: usize, // (count) numbers before decimal post: usize, // (count) numbers after decimal @@ -175,29 +177,6 @@ struct NumDesc { } impl NumDesc { - pub fn try_new(mut str: &str) -> Result { - let mut num = NumDesc::default(); - - while !str.is_empty() { - match NUM_KEYWORDS.iter().find(|k| str.starts_with(k.name)) { - Some(k) => { - let n = FormatNode { - typ: NodeType::Action, - character: Vec::new(), - suffix: 0, - key: k.clone(), - }; - - num.prepare(&n)?; - - str = &str[k.name.len()..] - } - None => todo!(), - } - } - Ok(num) - } - fn prepare(&mut self, n: &FormatNode) -> std::result::Result<(), &'static str> { if !matches!(n.typ, NodeType::Action) { return Ok(()); @@ -399,16 +378,745 @@ impl NumDesc { } } +fn parse_format( + mut str: &str, + kw: &[KeyWord], + mut num: Option<&mut NumDesc>, +) -> Result> { + let mut nodes = Vec::new(); + while !str.is_empty() { + match kw.iter().find(|k| str.starts_with(k.name)) { + Some(k) => { + let n = FormatNode { + typ: NodeType::Action, + character: Vec::new(), + suffix: 0, // todo + key: k.clone(), + }; + + if let Some(num) = num.as_mut() { + num.prepare(&n)? + } + str = &str[k.name.len()..]; + + nodes.push(n) + } + None => todo!(), + } + } + Ok(nodes) +} + +struct NumProc { + desc: NumDesc, // number description + + sign: bool, // '-' or '+' + sign_wrote: bool, // was sign write + num_count: usize, // number of write digits + num_in: bool, // is inside number + num_curr: usize, // current position in number + out_pre_spaces: usize, // spaces before first digit + + read_dec: bool, // to_number - was read dec. point + read_post: usize, // to_number - number of dec. digit + read_pre: usize, // to_number - number non-dec. digit + + number: Vec, + number_p: usize, + + inout: String, + + last_relevant: Option<(char, usize)>, // last relevant number after decimal point + + decimal: String, + loc_negative_sign: String, + loc_positive_sign: String, + loc_thousands_sep: String, + loc_currency_symbol: String, +} + +impl NumProc { + // ---------- + // Add digit or sign to number-string + // ---------- + + fn numpart_to_char(&mut self, id: NumPoz) { + // Write sign if real number will write to output Note: IS_PREDEC_SPACE() + // handle "9.9" --> " .1" + if !self.sign_wrote + && (self.num_curr >= self.out_pre_spaces + || self.desc.flag.contains(NumFlag::Zero) && self.desc.zero_start == self.num_curr) + && (!self.is_predec_space() || self.last_relevant.is_some()) + { + if self.desc.flag.contains(NumFlag::LSign) { + if matches!(self.desc.lsign, Some(NumLSign::Pre)) { + if self.sign { + self.inout.push_str(&self.loc_positive_sign) + } else { + self.inout.push_str(&self.loc_negative_sign) + } + self.sign_wrote = true; + } + } else if self.desc.flag.contains(NumFlag::Bracket) { + if self.sign { + self.inout.push(' ') + } else { + self.inout.push('<') + } + self.sign_wrote = true; + } else if self.sign { + if !self.desc.flag.contains(NumFlag::FillMode) { + self.inout.push(' '); /* Write + */ + } + self.sign_wrote = true; + } else { + // Write - + self.inout.push('-'); + self.sign_wrote = true; + } + } + + // if (Np->sign_wrote == false && + // (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) && + // (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.'))) + // { + + // } + + match id { + NumPoz::Num9 | NumPoz::Num0 | NumPoz::NumDec | NumPoz::NumD => { + if self.num_curr < self.out_pre_spaces + && (self.desc.zero_start > self.num_curr + || !self.desc.flag.contains(NumFlag::Zero)) + { + // Write blank space + if !self.desc.flag.contains(NumFlag::FillMode) { + self.inout.push(' ') /* Write ' ' */ + } + } else if self.desc.flag.contains(NumFlag::Zero) + && self.num_curr < self.out_pre_spaces + && self.desc.zero_start <= self.num_curr + { + // Write ZERO + self.inout.push('0'); /* Write '0' */ + self.num_in = true + } else { + // Write Decimal point + if self.number.get(self.number_p).is_some_and(|c| *c == '.') { + if !self.last_relevant_is_dot() { + self.inout.push_str(&self.decimal) /* Write DEC/D */ + } + // Ora 'n' -- FM9.9 --> 'n.' + else if self.desc.flag.contains(NumFlag::FillMode) + && self.last_relevant_is_dot() + { + self.inout.push_str(&self.decimal) /* Write DEC/D */ + } + } else { + if self.last_relevant.is_some_and(|(_, i)| self.number_p > i) + && !matches!(id, NumPoz::Num0) + { + } + // '0.1' -- 9.9 --> ' .1' + else if self.is_predec_space() { + if self.desc.flag.contains(NumFlag::FillMode) { + self.inout.push(' '); + } + // '0' -- FM9.9 --> '0.' + else if self.last_relevant_is_dot() { + self.inout.push('0') + } + } else { + if self.number_p < self.number.len() { + self.inout.push(self.number[self.number_p]); /* Write DIGIT */ + self.num_in = true + } + } + } + if self.number_p < self.number.len() { + self.number_p += 1; + } + } + } + + _ => unimplemented!(), + } + + self.num_curr += 1; + } + + fn is_predec_space(&self) -> bool { + !self.desc.flag.contains(NumFlag::Zero) && + // self.number == self.number_p && + // (_n)->number == (_n)->number_p && \ +// *(_n)->number == '0' && \ + self.desc.post !=0 + } + + fn calc_last_relevant_decnum(&mut self) { + let mut n = None; + for (i, c) in self.number.iter().enumerate() { + match n.as_ref() { + Some(_) if *c != '0' => n = Some(i), + None if *c == '.' => n = Some(i), + _ => {} + } + } + self.last_relevant = n.map(|n| (*self.number.iter().nth(n).unwrap(), n)); + } + + fn last_relevant_is_dot(&self) -> bool { + self.last_relevant.is_some_and(|(c, _)| c == '.') + } +} + +fn num_processor( + nodes: &[FormatNode], + desc: NumDesc, + number: String, + out_pre_spaces: usize, + sign: bool, +) -> String { + let mut np = NumProc { + desc, + sign, + sign_wrote: false, + num_count: 0, + num_in: false, + num_curr: 0, + out_pre_spaces, + read_dec: false, + read_post: 0, + read_pre: 0, + number: number.chars().collect(), + number_p: 0, + inout: String::new(), + last_relevant: None, + decimal: ".".to_string(), + loc_negative_sign: String::new(), + loc_positive_sign: String::new(), + loc_thousands_sep: String::new(), + loc_currency_symbol: String::new(), + }; + + if np.desc.zero_start > 0 { + np.desc.zero_start -= 1; + } + + // MemSet(Np, 0, sizeof(NUMProc)); + + // Np->number = number; + // Np->inout = inout; + // Np->last_relevant = NULL; + // Np->read_post = 0; + // Np->read_pre = 0; + // Np->read_dec = false; + + // if (Np->Num->zero_start) + // --Np->Num->zero_start; + + // if (IS_EEEE(Np->Num)) + // { + // if (!Np->is_to_char) + // ereport(ERROR, + // (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + // errmsg("\"EEEE\" not supported for input"))); + // return strcpy(inout, number); + // } + + // /* + // * Roman correction + // */ + // if (IS_ROMAN(Np->Num)) + // { + // } + + // /* + // * Sign + // */ + // if (is_to_char) + // { + // Np->sign = sign; + + // /* MI/PL/SG - write sign itself and not in number */ + // if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)) + // { + // if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false) + // Np->sign_wrote = false; /* need sign */ + // else + // Np->sign_wrote = true; /* needn't sign */ + // } + // else + // { + if np.sign && np.desc.flag.contains(NumFlag::FillMode) { + // Np->Num->flag &= ~NUM_F_BRACKET; + } + + if np.sign && np.desc.flag.contains(NumFlag::FillMode) && !np.desc.flag.contains(NumFlag::LSign) + { + np.sign_wrote = true /* needn't sign */ + } else { + np.sign_wrote = false /* need sign */ + } + if matches!(np.desc.lsign, Some(NumLSign::Pre)) && np.desc.pre == np.desc.pre_lsign_num { + np.desc.lsign = Some(NumLSign::Post) + } + + // Count + np.num_count = np.desc.post + np.desc.pre - 1; + + // if (is_to_char) + // { + // Np->out_pre_spaces = to_char_out_pre_spaces; + + if np.desc.flag.contains(NumFlag::FillMode) && np.desc.flag.contains(NumFlag::Decimal) { + np.calc_last_relevant_decnum(); + + // /* + // * If any '0' specifiers are present, make sure we don't strip + // * those digits. But don't advance last_relevant beyond the last + // * character of the Np->number string, which is a hazard if the + // * number got shortened due to precision limitations. + // */ + // if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) + // { + // int last_zero_pos; + // char *last_zero; + + // /* note that Np->number cannot be zero-length here */ + // last_zero_pos = strlen(Np->number) - 1; + // last_zero_pos = Min(last_zero_pos, + // Np->Num->zero_end - Np->out_pre_spaces); + // last_zero = Np->number + last_zero_pos; + // if (Np->last_relevant < last_zero) + // Np->last_relevant = last_zero; + // } + } + + if !np.sign_wrote && np.out_pre_spaces == 0 { + np.num_count += 1; + } + + // /* + // * Locale + // */ + // NUM_prepare_locale(Np); + + // /* + // * Processor direct cycle + // */ + for n in nodes.iter() { + // if (!Np->is_to_char) + // { + // /* + // * Check at least one byte remains to be scanned. (In actions + // * below, must use AMOUNT_TEST if we want to read more bytes than + // * that.) + // */ + // if (OVERLOAD_TEST) + // break; + // } + + // /* + // * Format pictures actions + // */ + // if (n->type == NODE_TYPE_ACTION) + // { + // /* + // * Create/read digit/zero/blank/sign/special-case + // * + // * 'NUM_S' note: The locale sign is anchored to number and we + // * read/write it when we work with first or last number + // * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). + // * + // * Notice the "Np->inout_p++" at the bottom of the loop. This is + // * why most of the actions advance inout_p one less than you might + // * expect. In cases where we don't want that increment to happen, + // * a switch case ends with "continue" not "break". + // */ + match n.key.id { + id @ (NumPoz::Num9 | NumPoz::Num0 | NumPoz::NumDec | NumPoz::NumD) => { + np.numpart_to_char(id) + } + + NumPoz::NumComma => todo!(), + + NumPoz::NumB => todo!(), + NumPoz::NumC => todo!(), + NumPoz::NumE => todo!(), + + _ => unimplemented!(), + } + + // switch (n->key->id) + // { + // case NUM_9: + // case NUM_0: + // case NUM_DEC: + // case NUM_D: + + // case NUM_COMMA: + // if (Np->is_to_char) + // { + // if (!Np->num_in) + // { + // if (IS_FILLMODE(Np->Num)) + // continue; + // else + // *Np->inout_p = ' '; + // } + // else + // *Np->inout_p = ','; + // } + // else + // { + // if (!Np->num_in) + // { + // if (IS_FILLMODE(Np->Num)) + // continue; + // } + // if (*Np->inout_p != ',') + // continue; + // } + // break; + + // case NUM_G: + // pattern = Np->L_thousands_sep; + // pattern_len = strlen(pattern); + // if (Np->is_to_char) + // { + // if (!Np->num_in) + // { + // if (IS_FILLMODE(Np->Num)) + // continue; + // else + // { + // /* just in case there are MB chars */ + // pattern_len = pg_mbstrlen(pattern); + // memset(Np->inout_p, ' ', pattern_len); + // Np->inout_p += pattern_len - 1; + // } + // } + // else + // { + // strcpy(Np->inout_p, pattern); + // Np->inout_p += pattern_len - 1; + // } + // } + // else + // { + // if (!Np->num_in) + // { + // if (IS_FILLMODE(Np->Num)) + // continue; + // } + + // /* + // * Because L_thousands_sep typically contains data + // * characters (either '.' or ','), we can't use + // * NUM_eat_non_data_chars here. Instead skip only if + // * the input matches L_thousands_sep. + // */ + // if (AMOUNT_TEST(pattern_len) && + // strncmp(Np->inout_p, pattern, pattern_len) == 0) + // Np->inout_p += pattern_len - 1; + // else + // continue; + // } + // break; + + // case NUM_L: + // pattern = Np->L_currency_symbol; + // if (Np->is_to_char) + // { + // strcpy(Np->inout_p, pattern); + // Np->inout_p += strlen(pattern) - 1; + // } + // else + // { + // NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); + // continue; + // } + // break; + + // case NUM_RN: + // if (IS_FILLMODE(Np->Num)) + // { + // strcpy(Np->inout_p, Np->number_p); + // Np->inout_p += strlen(Np->inout_p) - 1; + // } + // else + // { + // sprintf(Np->inout_p, "%15s", Np->number_p); + // Np->inout_p += strlen(Np->inout_p) - 1; + // } + // break; + + // case NUM_rn: + // if (IS_FILLMODE(Np->Num)) + // { + // strcpy(Np->inout_p, asc_tolower_z(Np->number_p)); + // Np->inout_p += strlen(Np->inout_p) - 1; + // } + // else + // { + // sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p)); + // Np->inout_p += strlen(Np->inout_p) - 1; + // } + // break; + + // case NUM_th: + // if (IS_ROMAN(Np->Num) || *Np->number == '#' || + // Np->sign == '-' || IS_DECIMAL(Np->Num)) + // continue; + + // if (Np->is_to_char) + // { + // strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); + // Np->inout_p += 1; + // } + // else + // { + // /* All variants of 'th' occupy 2 characters */ + // NUM_eat_non_data_chars(Np, 2, input_len); + // continue; + // } + // break; + + // case NUM_TH: + // if (IS_ROMAN(Np->Num) || *Np->number == '#' || + // Np->sign == '-' || IS_DECIMAL(Np->Num)) + // continue; + + // if (Np->is_to_char) + // { + // strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); + // Np->inout_p += 1; + // } + // else + // { + // /* All variants of 'TH' occupy 2 characters */ + // NUM_eat_non_data_chars(Np, 2, input_len); + // continue; + // } + // break; + + // case NUM_MI: + // if (Np->is_to_char) + // { + // if (Np->sign == '-') + // *Np->inout_p = '-'; + // else if (IS_FILLMODE(Np->Num)) + // continue; + // else + // *Np->inout_p = ' '; + // } + // else + // { + // if (*Np->inout_p == '-') + // *Np->number = '-'; + // else + // { + // NUM_eat_non_data_chars(Np, 1, input_len); + // continue; + // } + // } + // break; + + // case NUM_PL: + // if (Np->is_to_char) + // { + // if (Np->sign == '+') + // *Np->inout_p = '+'; + // else if (IS_FILLMODE(Np->Num)) + // continue; + // else + // *Np->inout_p = ' '; + // } + // else + // { + // if (*Np->inout_p == '+') + // *Np->number = '+'; + // else + // { + // NUM_eat_non_data_chars(Np, 1, input_len); + // continue; + // } + // } + // break; + + // case NUM_SG: + // if (Np->is_to_char) + // *Np->inout_p = Np->sign; + // else + // { + // if (*Np->inout_p == '-') + // *Np->number = '-'; + // else if (*Np->inout_p == '+') + // *Np->number = '+'; + // else + // { + // NUM_eat_non_data_chars(Np, 1, input_len); + // continue; + // } + // } + // break; + + // default: + // continue; + // break; + // } + // } + // else + // { + // /* + // * In TO_CHAR, non-pattern characters in the format are copied to + // * the output. In TO_NUMBER, we skip one input character for each + // * non-pattern format character, whether or not it matches the + // * format character. + // */ + // if (Np->is_to_char) + // { + // strcpy(Np->inout_p, n->character); + // Np->inout_p += strlen(Np->inout_p); + // } + // else + // { + // Np->inout_p += pg_mblen(Np->inout_p); + // } + // continue; + // } + // Np->inout_p++; + } + + // if (Np->is_to_char) + // { + // *Np->inout_p = '\0'; + // return Np->inout; + // } + // else + // { + // if (*(Np->number_p - 1) == '.') + // *(Np->number_p - 1) = '\0'; + // else + // *Np->number_p = '\0'; + + // /* + // * Correction - precision of dec. number + // */ + // Np->Num->post = Np->read_post; + + // #ifdef DEBUG_TO_FROM_CHAR + // elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number); + // #endif + // return Np->number; + // } + + np.inout +} + +fn i32_to_char(value: i32, fmt: &str) -> Result { + let mut desc = NumDesc::default(); + let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc)).unwrap(); + + let sign = value >= 0; + let (numstr, out_pre_spaces) = if desc.flag.contains(NumFlag::Roman) { + unimplemented!() + } else if desc.flag.contains(NumFlag::EEEE) { + // we can do it easily because f32 won't lose any precision + let orgnum = format!("{:+.*e}", desc.post, value as f32); + + // Swap a leading positive sign for a space. + let orgnum = orgnum.replace("+", "_"); + + (orgnum, 0) + } else { + let mut orgnum = if desc.flag.contains(NumFlag::Multi) { + todo!(); + // orgnum = DatumGetCString(DirectFunctionCall1(int4out, + // Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi))))); + // desc.pre += desc.multi; + } else { + format!("{}", value.abs()) + }; + + let numstr_pre_len = orgnum.len(); + + // post-decimal digits? Pad out with zeros. + if desc.post > 0 { + orgnum.push('.'); + orgnum.push_str(&"0".repeat(desc.post)) + } + + match numstr_pre_len.cmp(&desc.pre) { + // needs padding? + std::cmp::Ordering::Less => (orgnum, desc.pre - numstr_pre_len), + std::cmp::Ordering::Equal => (orgnum, 0), + std::cmp::Ordering::Greater => { + // overflowed prefix digit format? + (["#".repeat(desc.pre), "#".repeat(desc.post)].join("."), 0) + } + } + }; + + Ok(num_processor(&nodes, desc, numstr, out_pre_spaces, sign)) +} + #[cfg(test)] mod tests { - use crate::number::NumDesc; + use super::*; #[test] - fn test_aa() { - let fmt = "9990999.9"; + fn test_i32() -> Result<()> { + assert_eq!(" 123", i32_to_char(123, "999")?); + assert_eq!("-123", i32_to_char(-123, "999")?); + + assert_eq!(" 0123", i32_to_char(123, "0999")?); + assert_eq!("-0123", i32_to_char(-123, "0999")?); + + assert_eq!(" 123", i32_to_char(123, "99999")?); + assert_eq!(" -123", i32_to_char(-123, "99999")?); + + assert_eq!(" 0123", i32_to_char(123, "9990999")?); + assert_eq!(" -0123", i32_to_char(-123, "9990999")?); + + assert_eq!(" 12345", i32_to_char(12345, "9990999")?); + assert_eq!(" -12345", i32_to_char(-12345, "9990999")?); + + assert_eq!(" ##", i32_to_char(123, "99")?); + assert_eq!("-##", i32_to_char(-123, "99")?); + + assert_eq!(" ##.", i32_to_char(123, "99.")?); + assert_eq!("-##.", i32_to_char(-123, "99.")?); - let desc = NumDesc::try_new(fmt).unwrap(); + // assert_eq!(" ##.#",i32_to_char(123,"99.0")?); + // assert_eq!("-##.#",i32_to_char(-123,"99.0")?); - println!("{:?}", desc); + Ok(()) + } + + fn run_test(num: &str, fmt: &str, sign: bool) { + let mut desc = NumDesc::default(); + let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc)).unwrap(); + + let numstr = num.to_string(); + + let numstr_pre_len = match numstr.find('.') { + Some(i) => i, + None => numstr.len(), + }; + + let out_pre_spaces = if numstr_pre_len < desc.pre { + desc.pre - numstr_pre_len + } else { + 0 + }; + + let out = num_processor(&nodes, desc, numstr, out_pre_spaces, sign); + + println!("{out:?}") } } + +// 123 '9990999' 0123 +// 123 '99900999.999' 0123.000 From c3925e78baf6acdd878a3d79d8975d4798f3d4c1 Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 24 Sep 2024 16:32:17 +0800 Subject: [PATCH 3/9] clean up Signed-off-by: coldWater --- src/common/io/src/number.rs | 1146 ++++++++++++----------------------- 1 file changed, 397 insertions(+), 749 deletions(-) diff --git a/src/common/io/src/number.rs b/src/common/io/src/number.rs index d0f9cf79083d..48a1232f7cc1 100644 --- a/src/common/io/src/number.rs +++ b/src/common/io/src/number.rs @@ -12,58 +12,105 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Template Patterns for Numeric Formatting - +use databend_common_exception::ErrorCode; use databend_common_exception::Result; use enumflags2::bitflags; use enumflags2::BitFlags; +// Template Patterns for Numeric Formatting // https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/formatting.c +#[derive(Debug, Clone)] +struct KeyWord { + name: &'static str, + id: NumPoz, + // is_digit: bool, + // date_mode: FromCharDateMode, +} + #[derive(Debug, Clone, Copy)] +#[expect(dead_code)] enum NumPoz { - NumComma, - NumDec, - Num0, - Num9, - NumB, - NumC, - NumD, - NumE, - NumFM, - NumG, - NumL, - NumMI, - NumPL, - NumPR, - NumRN, - NumSG, - NumSP, - NumS, - NumTH, - NumV, - Numb, - Numc, - Numd, - Nume, - Numfm, - Numg, - Numl, - Nummi, - Numpl, - Numpr, - Numrn, - Numsg, - Numsp, - Nums, - Numth, - Numv, + TkComma, + TkDec, + Tk0, + Tk9, + TkB, + TkC, + TkD, + TkE, + TkFM, + TkG, + TkL, + TkMI, + TkPL, + TkPR, + TkRN, + TkSG, + TkSP, + TkS, + TkTH, + TkV, + Tkb, + Tkc, + Tkd, + Tke, + Tkfm, + Tkg, + Tkl, + Tkmi, + Tkpl, + Tkpr, + Tkrn, + Tksg, + Tksp, + Tks, + Tkth, + Tkv, } +const NUM_KEYWORDS: [KeyWord; 36] = [ + KeyWord::new(",", NumPoz::TkComma), + KeyWord::new(".", NumPoz::TkDec), + KeyWord::new("0", NumPoz::Tk0), + KeyWord::new("9", NumPoz::Tk9), + KeyWord::new("B", NumPoz::TkB), + KeyWord::new("C", NumPoz::TkC), + KeyWord::new("D", NumPoz::TkD), + KeyWord::new("EEEE", NumPoz::TkE), + KeyWord::new("FM", NumPoz::TkFM), + KeyWord::new("G", NumPoz::TkG), + KeyWord::new("L", NumPoz::TkL), + KeyWord::new("MI", NumPoz::TkMI), + KeyWord::new("PL", NumPoz::TkPL), + KeyWord::new("PR", NumPoz::TkPR), + KeyWord::new("RN", NumPoz::TkRN), + KeyWord::new("SG", NumPoz::TkSG), + KeyWord::new("SP", NumPoz::TkSP), + KeyWord::new("S", NumPoz::TkS), + KeyWord::new("TH", NumPoz::TkTH), + KeyWord::new("V", NumPoz::TkV), + KeyWord::new("b", NumPoz::Tkb), + KeyWord::new("c", NumPoz::Tkc), + KeyWord::new("d", NumPoz::TkD), + KeyWord::new("eeee", NumPoz::TkE), + KeyWord::new("fm", NumPoz::TkFM), + KeyWord::new("g", NumPoz::TkG), + KeyWord::new("l", NumPoz::TkL), + KeyWord::new("mi", NumPoz::TkMI), + KeyWord::new("pl", NumPoz::TkPL), + KeyWord::new("pr", NumPoz::TkPR), + KeyWord::new("rn", NumPoz::Tkrn), + KeyWord::new("sg", NumPoz::TkSG), + KeyWord::new("sp", NumPoz::TkSP), + KeyWord::new("s", NumPoz::TkS), + KeyWord::new("th", NumPoz::Tkth), + KeyWord::new("v", NumPoz::TkV), +]; + // ---------- // Flags for NUMBER version // ---------- - #[bitflags] #[repr(u16)] #[derive(Clone, Copy, Debug)] @@ -90,72 +137,19 @@ enum NumLSign { Post, } -#[derive(Debug, Clone)] -struct KeyWord { - name: &'static str, - id: NumPoz, - // is_digit: bool, - // date_mode: FromCharDateMode, -} - impl KeyWord { const fn new(name: &'static str, id: NumPoz) -> KeyWord { KeyWord { name, id } } } -const NUM_KEYWORDS: [KeyWord; 36] = [ - KeyWord::new(",", NumPoz::NumComma), - KeyWord::new(".", NumPoz::NumDec), - KeyWord::new("0", NumPoz::Num0), - KeyWord::new("9", NumPoz::Num9), - KeyWord::new("B", NumPoz::NumB), - KeyWord::new("C", NumPoz::NumC), - KeyWord::new("D", NumPoz::NumD), - KeyWord::new("EEEE", NumPoz::NumE), - KeyWord::new("FM", NumPoz::NumFM), - KeyWord::new("G", NumPoz::NumG), - KeyWord::new("L", NumPoz::NumL), - KeyWord::new("MI", NumPoz::NumMI), - KeyWord::new("PL", NumPoz::NumPL), - KeyWord::new("PR", NumPoz::NumPR), - KeyWord::new("RN", NumPoz::NumRN), - KeyWord::new("SG", NumPoz::NumSG), - KeyWord::new("SP", NumPoz::NumSP), - KeyWord::new("S", NumPoz::NumS), - KeyWord::new("TH", NumPoz::NumTH), - KeyWord::new("V", NumPoz::NumV), - KeyWord::new("b", NumPoz::NumB), - KeyWord::new("c", NumPoz::Numc), - KeyWord::new("d", NumPoz::NumD), - KeyWord::new("eeee", NumPoz::NumE), - KeyWord::new("fm", NumPoz::NumFM), - KeyWord::new("g", NumPoz::NumG), - KeyWord::new("l", NumPoz::NumL), - KeyWord::new("mi", NumPoz::NumMI), - KeyWord::new("pl", NumPoz::NumPL), - KeyWord::new("pr", NumPoz::NumPR), - KeyWord::new("rn", NumPoz::Numrn), - KeyWord::new("sg", NumPoz::NumSG), - KeyWord::new("sp", NumPoz::NumSP), - KeyWord::new("s", NumPoz::NumS), - KeyWord::new("th", NumPoz::Numth), - KeyWord::new("v", NumPoz::NumV), -]; - #[derive(Debug)] -struct FormatNode { - typ: NodeType, - character: Vec, // if type is CHAR - suffix: u8, // keyword prefix/suffix code, if any - key: KeyWord, // if type is ACTION -} +#[expect(dead_code)] -#[derive(Debug)] -enum NodeType { +enum FormatNode { End, - Action, - Char, + Action(KeyWord), + Char(Vec), Separator, Space, } @@ -178,202 +172,202 @@ struct NumDesc { impl NumDesc { fn prepare(&mut self, n: &FormatNode) -> std::result::Result<(), &'static str> { - if !matches!(n.typ, NodeType::Action) { - return Ok(()); - } + if let FormatNode::Action(key) = n { + if self.flag.contains(NumFlag::EEEE) && !matches!(key.id, NumPoz::TkE) { + return Err("\"EEEE\" must be the last pattern used"); + } - if self.flag.contains(NumFlag::EEEE) && !matches!(n.key.id, NumPoz::NumE) { - return Err("\"EEEE\" must be the last pattern used"); - } + match key.id { + NumPoz::Tk9 => { + if self.flag.contains(NumFlag::Bracket) { + return Err("\"9\" must be ahead of \"PR\""); + } - match n.key.id { - NumPoz::Num9 => { - if self.flag.contains(NumFlag::Bracket) { - return Err("\"9\" must be ahead of \"PR\""); - } + if self.flag.contains(NumFlag::Multi) { + self.multi += 1; + return Ok(()); + } - if self.flag.contains(NumFlag::Multi) { - self.multi += 1; + if self.flag.contains(NumFlag::Decimal) { + self.post += 1; + } else { + self.pre += 1; + } return Ok(()); } - if self.flag.contains(NumFlag::Decimal) { - self.post += 1; - } else { - self.pre += 1; - } - return Ok(()); - } + NumPoz::Tk0 => { + if self.flag.contains(NumFlag::Bracket) { + return Err("\"0\" must be ahead of \"PR\""); + } - NumPoz::Num0 => { - if self.flag.contains(NumFlag::Bracket) { - return Err("\"0\" must be ahead of \"PR\""); - } + if !self.flag.intersects(NumFlag::Zero | NumFlag::Decimal) { + self.flag.insert(NumFlag::Zero); + self.zero_start = self.pre + 1; + } - if !self.flag.intersects(NumFlag::Zero | NumFlag::Decimal) { - self.flag.insert(NumFlag::Zero); - self.zero_start = self.pre + 1; - } + if !self.flag.contains(NumFlag::Decimal) { + self.pre += 1; + } else { + self.post += 1; + } - if !self.flag.contains(NumFlag::Decimal) { - self.pre += 1; - } else { - self.post += 1; + self.zero_end = self.pre + self.post; + Ok(()) } - self.zero_end = self.pre + self.post; - Ok(()) - } - - NumPoz::NumB => { - if self.pre == 0 && self.post == 0 && !self.flag.contains(NumFlag::Zero) { - self.flag.insert(NumFlag::Blank) + NumPoz::TkB => { + if self.pre == 0 && self.post == 0 && !self.flag.contains(NumFlag::Zero) { + self.flag.insert(NumFlag::Blank) + } + Ok(()) } - Ok(()) - } - NumPoz::NumD => { - self.flag.insert(NumFlag::LDecimal); - self.need_locale = true; + NumPoz::TkD => { + self.flag.insert(NumFlag::LDecimal); + self.need_locale = true; - if self.flag.contains(NumFlag::Decimal) { - return Err("multiple decimal points"); - } - if self.flag.contains(NumFlag::Multi) { - return Err("cannot use \"V\" and decimal point together"); + if self.flag.contains(NumFlag::Decimal) { + return Err("multiple decimal points"); + } + if self.flag.contains(NumFlag::Multi) { + return Err("cannot use \"V\" and decimal point together"); + } + + self.flag.insert(NumFlag::Decimal); + Ok(()) } - self.flag.insert(NumFlag::Decimal); - Ok(()) - } + NumPoz::TkDec => { + if self.flag.contains(NumFlag::Decimal) { + return Err("multiple decimal points"); + } + if self.flag.contains(NumFlag::Multi) { + return Err("cannot use \"V\" and decimal point together"); + } - NumPoz::NumDec => { - if self.flag.contains(NumFlag::Decimal) { - return Err("multiple decimal points"); - } - if self.flag.contains(NumFlag::Multi) { - return Err("cannot use \"V\" and decimal point together"); + self.flag.insert(NumFlag::Decimal); + Ok(()) } - self.flag.insert(NumFlag::Decimal); - Ok(()) - } + NumPoz::TkFM => { + self.flag.insert(NumFlag::FillMode); + Ok(()) + } - NumPoz::NumFM => { - self.flag.insert(NumFlag::FillMode); - Ok(()) - } + NumPoz::TkS => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" twice"); + } + if self + .flag + .intersects(NumFlag::Plus | NumFlag::Minus | NumFlag::Bracket) + { + return Err("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"); + } - NumPoz::NumS => { - if self.flag.contains(NumFlag::LSign) { - return Err("cannot use \"S\" twice"); - } - if self - .flag - .intersects(NumFlag::Plus | NumFlag::Minus | NumFlag::Bracket) - { - return Err("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"); - } + if self.flag.contains(NumFlag::Decimal) { + self.lsign = Some(NumLSign::Pre); + self.pre_lsign_num = self.pre; + self.need_locale = true; + self.flag.insert(NumFlag::LSign); + return Ok(()); + } - if self.flag.contains(NumFlag::Decimal) { - self.lsign = Some(NumLSign::Pre); - self.pre_lsign_num = self.pre; - self.need_locale = true; - self.flag.insert(NumFlag::LSign); - return Ok(()); + if self.lsign.is_none() { + self.lsign = Some(NumLSign::Post); + self.need_locale = true; + self.flag.insert(NumFlag::LSign); + } + Ok(()) } - if self.lsign.is_none() { - self.lsign = Some(NumLSign::Post); - self.need_locale = true; - self.flag.insert(NumFlag::LSign); - } - Ok(()) - } + NumPoz::TkMI => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" and \"MI\" together"); + } - NumPoz::NumMI => { - if self.flag.contains(NumFlag::LSign) { - return Err("cannot use \"S\" and \"MI\" together"); + self.flag.insert(NumFlag::Minus); + if self.flag.contains(NumFlag::Decimal) { + self.flag.insert(NumFlag::MinusPost) + } + Ok(()) } + NumPoz::TkPL => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" and \"PL\" together"); + } - self.flag.insert(NumFlag::Minus); - if self.flag.contains(NumFlag::Decimal) { - self.flag.insert(NumFlag::MinusPost) + self.flag.insert(NumFlag::Plus); + if self.flag.contains(NumFlag::Decimal) { + self.flag.insert(NumFlag::PlusPost) + } + Ok(()) } - Ok(()) - } - NumPoz::NumPL => { - if self.flag.contains(NumFlag::LSign) { - return Err("cannot use \"S\" and \"PL\" together"); + NumPoz::TkSG => { + if self.flag.contains(NumFlag::LSign) { + return Err("cannot use \"S\" and \"SG\" together"); + } + self.flag.insert(NumFlag::Plus | NumFlag::Minus); + Ok(()) } + NumPoz::TkPR => { + if self + .flag + .intersects(NumFlag::LSign | NumFlag::Plus | NumFlag::Minus) + { + return Err("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"); + } - self.flag.insert(NumFlag::Plus); - if self.flag.contains(NumFlag::Decimal) { - self.flag.insert(NumFlag::PlusPost) - } - Ok(()) - } - NumPoz::NumSG => { - if self.flag.contains(NumFlag::LSign) { - return Err("cannot use \"S\" and \"SG\" together"); + self.flag.insert(NumFlag::Bracket); + Ok(()) } - self.flag.insert(NumFlag::Plus | NumFlag::Minus); - Ok(()) - } - NumPoz::NumPR => { - if self - .flag - .intersects(NumFlag::LSign | NumFlag::Plus | NumFlag::Minus) - { - return Err("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"); + NumPoz::Tkrn | NumPoz::TkRN => { + self.flag.insert(NumFlag::Roman); + Ok(()) } - self.flag.insert(NumFlag::Bracket); - Ok(()) - } - NumPoz::Numrn | NumPoz::NumRN => { - self.flag.insert(NumFlag::Roman); - Ok(()) - } - - NumPoz::NumL | NumPoz::NumG => { - self.need_locale = true; - Ok(()) - } - - NumPoz::NumV => { - if self.flag.contains(NumFlag::Decimal) { - return Err("cannot use \"V\" and decimal point together"); + NumPoz::TkL | NumPoz::TkG => { + self.need_locale = true; + Ok(()) } - self.flag.insert(NumFlag::Multi); - Ok(()) - } - NumPoz::NumE => { - if self.flag.contains(NumFlag::EEEE) { - return Err("cannot use \"EEEE\" twice"); + NumPoz::TkV => { + if self.flag.contains(NumFlag::Decimal) { + return Err("cannot use \"V\" and decimal point together"); + } + self.flag.insert(NumFlag::Multi); + Ok(()) } - if self.flag.intersects( - NumFlag::Blank - | NumFlag::FillMode - | NumFlag::LSign - | NumFlag::Bracket - | NumFlag::Minus - | NumFlag::Plus - | NumFlag::Roman - | NumFlag::Multi, - ) { - return Err( - "\"EEEE\" may only be used together with digit and decimal point patterns.", - ); + NumPoz::TkE => { + if self.flag.contains(NumFlag::EEEE) { + return Err("cannot use \"EEEE\" twice"); + } + + if self.flag.intersects( + NumFlag::Blank + | NumFlag::FillMode + | NumFlag::LSign + | NumFlag::Bracket + | NumFlag::Minus + | NumFlag::Plus + | NumFlag::Roman + | NumFlag::Multi, + ) { + return Err( + "\"EEEE\" may only be used together with digit and decimal point patterns.", + ); + } + + self.flag.insert(NumFlag::EEEE); + Ok(()) } - self.flag.insert(NumFlag::EEEE); - Ok(()) + _ => unreachable!(), } - - _ => unreachable!(), + } else { + unreachable!() } } } @@ -387,21 +381,18 @@ fn parse_format( while !str.is_empty() { match kw.iter().find(|k| str.starts_with(k.name)) { Some(k) => { - let n = FormatNode { - typ: NodeType::Action, - character: Vec::new(), - suffix: 0, // todo - key: k.clone(), - }; + let n = FormatNode::Action(k.clone()); if let Some(num) = num.as_mut() { - num.prepare(&n)? + num.prepare(&n).map_err(ErrorCode::SyntaxException)?; } str = &str[k.name.len()..]; nodes.push(n) } - None => todo!(), + None => Err(ErrorCode::SyntaxException( + "Currently only key words are supported".to_string(), + ))?, } } Ok(nodes) @@ -417,9 +408,9 @@ struct NumProc { num_curr: usize, // current position in number out_pre_spaces: usize, // spaces before first digit - read_dec: bool, // to_number - was read dec. point - read_post: usize, // to_number - number of dec. digit - read_pre: usize, // to_number - number non-dec. digit + _read_dec: bool, // to_number - was read dec. point + _read_post: usize, // to_number - number of dec. digit + _read_pre: usize, // to_number - number non-dec. digit number: Vec, number_p: usize, @@ -431,15 +422,14 @@ struct NumProc { decimal: String, loc_negative_sign: String, loc_positive_sign: String, - loc_thousands_sep: String, - loc_currency_symbol: String, + _loc_thousands_sep: String, + _loc_currency_symbol: String, } impl NumProc { // ---------- // Add digit or sign to number-string // ---------- - fn numpart_to_char(&mut self, id: NumPoz) { // Write sign if real number will write to output Note: IS_PREDEC_SPACE() // handle "9.9" --> " .1" @@ -470,87 +460,102 @@ impl NumProc { } self.sign_wrote = true; } else { - // Write - - self.inout.push('-'); + self.inout.push('-'); /* Write - */ self.sign_wrote = true; } } - // if (Np->sign_wrote == false && - // (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) && - // (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.'))) - // { - - // } - - match id { - NumPoz::Num9 | NumPoz::Num0 | NumPoz::NumDec | NumPoz::NumD => { - if self.num_curr < self.out_pre_spaces - && (self.desc.zero_start > self.num_curr - || !self.desc.flag.contains(NumFlag::Zero)) - { - // Write blank space - if !self.desc.flag.contains(NumFlag::FillMode) { - self.inout.push(' ') /* Write ' ' */ + // digits / FM / Zero / Dec. point + if matches!(id, NumPoz::Tk9 | NumPoz::Tk0 | NumPoz::TkDec | NumPoz::TkD) { + if self.num_curr < self.out_pre_spaces + && (self.desc.zero_start > self.num_curr || !self.desc.flag.contains(NumFlag::Zero)) + { + // Write blank space + if !self.desc.flag.contains(NumFlag::FillMode) { + self.inout.push(' ') /* Write ' ' */ + } + } else if self.desc.flag.contains(NumFlag::Zero) + && self.num_curr < self.out_pre_spaces + && self.desc.zero_start <= self.num_curr + { + // Write ZERO + self.inout.push('0'); /* Write '0' */ + self.num_in = true + } else { + // Write Decimal point + if self.number.get(self.number_p).is_some_and(|c| *c == '.') { + if !self.last_relevant_is_dot() { + self.inout.push_str(&self.decimal) /* Write DEC/D */ + } + // Ora 'n' -- FM9.9 --> 'n.' + else if self.desc.flag.contains(NumFlag::FillMode) + && self.last_relevant_is_dot() + { + self.inout.push_str(&self.decimal) /* Write DEC/D */ } - } else if self.desc.flag.contains(NumFlag::Zero) - && self.num_curr < self.out_pre_spaces - && self.desc.zero_start <= self.num_curr - { - // Write ZERO - self.inout.push('0'); /* Write '0' */ - self.num_in = true } else { - // Write Decimal point - if self.number.get(self.number_p).is_some_and(|c| *c == '.') { - if !self.last_relevant_is_dot() { - self.inout.push_str(&self.decimal) /* Write DEC/D */ + if self.last_relevant.is_some_and(|(_, i)| self.number_p > i) + && !matches!(id, NumPoz::Tk0) + { + } + // '0.1' -- 9.9 --> ' .1' + else if self.is_predec_space() { + if self.desc.flag.contains(NumFlag::FillMode) { + self.inout.push(' '); } - // Ora 'n' -- FM9.9 --> 'n.' - else if self.desc.flag.contains(NumFlag::FillMode) - && self.last_relevant_is_dot() - { - self.inout.push_str(&self.decimal) /* Write DEC/D */ + // '0' -- FM9.9 --> '0.' + else if self.last_relevant_is_dot() { + self.inout.push('0') } } else { - if self.last_relevant.is_some_and(|(_, i)| self.number_p > i) - && !matches!(id, NumPoz::Num0) - { - } - // '0.1' -- 9.9 --> ' .1' - else if self.is_predec_space() { - if self.desc.flag.contains(NumFlag::FillMode) { - self.inout.push(' '); - } - // '0' -- FM9.9 --> '0.' - else if self.last_relevant_is_dot() { - self.inout.push('0') - } - } else { - if self.number_p < self.number.len() { - self.inout.push(self.number[self.number_p]); /* Write DIGIT */ - self.num_in = true - } + if self.number_p < self.number.len() { + self.inout.push(self.number[self.number_p]); /* Write DIGIT */ + self.num_in = true } } - if self.number_p < self.number.len() { - self.number_p += 1; - } + } + if self.number_p < self.number.len() { + self.number_p += 1; } } - _ => unimplemented!(), + let end = self.num_count + + if self.out_pre_spaces > 0 { 1 } else { 0 } + + if self.desc.flag.contains(NumFlag::Decimal) { + 1 + } else { + 0 + }; + + let end = if self.last_relevant.is_some_and(|(_, i)| i == self.number_p) { + self.num_curr + } else { + end + }; + + if self.num_curr + 1 == end { + if self.sign_wrote && self.desc.flag.contains(NumFlag::Bracket) { + self.inout.push(if self.sign { ' ' } else { '>' }) + } else if self.desc.flag.contains(NumFlag::LSign) + && matches!(self.desc.lsign, Some(NumLSign::Post)) + { + self.inout.push_str(if self.sign { + &self.loc_positive_sign + } else { + &self.loc_negative_sign + }) + } + } } self.num_curr += 1; } fn is_predec_space(&self) -> bool { - !self.desc.flag.contains(NumFlag::Zero) && - // self.number == self.number_p && - // (_n)->number == (_n)->number_p && \ -// *(_n)->number == '0' && \ - self.desc.post !=0 + !self.desc.flag.contains(NumFlag::Zero) + && self.number_p == 0 + && self.number[0] == '0' + && self.desc.post != 0 } fn calc_last_relevant_decnum(&mut self) { @@ -585,9 +590,9 @@ fn num_processor( num_in: false, num_curr: 0, out_pre_spaces, - read_dec: false, - read_post: 0, - read_pre: 0, + _read_dec: false, + _read_post: 0, + _read_pre: 0, number: number.chars().collect(), number_p: 0, inout: String::new(), @@ -595,429 +600,98 @@ fn num_processor( decimal: ".".to_string(), loc_negative_sign: String::new(), loc_positive_sign: String::new(), - loc_thousands_sep: String::new(), - loc_currency_symbol: String::new(), + _loc_thousands_sep: String::new(), + _loc_currency_symbol: String::new(), }; if np.desc.zero_start > 0 { np.desc.zero_start -= 1; } - // MemSet(Np, 0, sizeof(NUMProc)); - - // Np->number = number; - // Np->inout = inout; - // Np->last_relevant = NULL; - // Np->read_post = 0; - // Np->read_pre = 0; - // Np->read_dec = false; - - // if (Np->Num->zero_start) - // --Np->Num->zero_start; - - // if (IS_EEEE(Np->Num)) - // { - // if (!Np->is_to_char) - // ereport(ERROR, - // (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - // errmsg("\"EEEE\" not supported for input"))); - // return strcpy(inout, number); - // } - - // /* - // * Roman correction - // */ - // if (IS_ROMAN(Np->Num)) - // { - // } - - // /* - // * Sign - // */ - // if (is_to_char) - // { - // Np->sign = sign; - - // /* MI/PL/SG - write sign itself and not in number */ - // if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)) - // { - // if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false) - // Np->sign_wrote = false; /* need sign */ - // else - // Np->sign_wrote = true; /* needn't sign */ - // } - // else - // { - if np.sign && np.desc.flag.contains(NumFlag::FillMode) { - // Np->Num->flag &= ~NUM_F_BRACKET; + // Roman correction + if np.desc.flag.contains(NumFlag::Roman) { + unimplemented!() } - if np.sign && np.desc.flag.contains(NumFlag::FillMode) && !np.desc.flag.contains(NumFlag::LSign) - { - np.sign_wrote = true /* needn't sign */ + // Sign + + // MI/PL/SG - write sign itself and not in number + if np.desc.flag.contains(NumFlag::Plus | NumFlag::Minus) { + if np.desc.flag.contains(NumFlag::Plus) && !np.desc.flag.contains(NumFlag::Minus) { + np.sign_wrote = false; /* need sign */ + } else { + np.sign_wrote = true; /* needn't sign */ + } } else { - np.sign_wrote = false /* need sign */ - } - if matches!(np.desc.lsign, Some(NumLSign::Pre)) && np.desc.pre == np.desc.pre_lsign_num { - np.desc.lsign = Some(NumLSign::Post) + if np.sign && np.desc.flag.contains(NumFlag::FillMode) { + np.desc.flag.remove(NumFlag::Bracket) + } + + if np.sign + && np.desc.flag.contains(NumFlag::FillMode) + && !np.desc.flag.contains(NumFlag::LSign) + { + np.sign_wrote = true /* needn't sign */ + } else { + np.sign_wrote = false /* need sign */ + } + if matches!(np.desc.lsign, Some(NumLSign::Pre)) && np.desc.pre == np.desc.pre_lsign_num { + np.desc.lsign = Some(NumLSign::Post) + } } // Count np.num_count = np.desc.post + np.desc.pre - 1; - // if (is_to_char) - // { - // Np->out_pre_spaces = to_char_out_pre_spaces; - if np.desc.flag.contains(NumFlag::FillMode) && np.desc.flag.contains(NumFlag::Decimal) { np.calc_last_relevant_decnum(); - // /* - // * If any '0' specifiers are present, make sure we don't strip - // * those digits. But don't advance last_relevant beyond the last - // * character of the Np->number string, which is a hazard if the - // * number got shortened due to precision limitations. - // */ - // if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) - // { - // int last_zero_pos; - // char *last_zero; - - // /* note that Np->number cannot be zero-length here */ - // last_zero_pos = strlen(Np->number) - 1; - // last_zero_pos = Min(last_zero_pos, - // Np->Num->zero_end - Np->out_pre_spaces); - // last_zero = Np->number + last_zero_pos; - // if (Np->last_relevant < last_zero) - // Np->last_relevant = last_zero; - // } + // If any '0' specifiers are present, make sure we don't strip + // those digits. But don't advance last_relevant beyond the last + // character of the np.number string, which is a hazard if the + // number got shortened due to precision limitations. + if let Some(last_relevant) = np.last_relevant { + if np.desc.zero_end > np.out_pre_spaces { + // note that np.number cannot be zero-length here + let last_zero_pos = np.number.len() - 1; + let last_zero_pos = last_zero_pos.min(np.desc.zero_end - np.out_pre_spaces); + + if last_relevant.1 < last_zero_pos { + let ch = np.number[last_zero_pos]; + np.last_relevant = Some((ch, last_zero_pos)) + } + } + } } if !np.sign_wrote && np.out_pre_spaces == 0 { np.num_count += 1; } - // /* - // * Locale - // */ + // Locale // NUM_prepare_locale(Np); - // /* - // * Processor direct cycle - // */ + // Processor direct cycle for n in nodes.iter() { - // if (!Np->is_to_char) - // { - // /* - // * Check at least one byte remains to be scanned. (In actions - // * below, must use AMOUNT_TEST if we want to read more bytes than - // * that.) - // */ - // if (OVERLOAD_TEST) - // break; - // } - - // /* - // * Format pictures actions - // */ - // if (n->type == NODE_TYPE_ACTION) - // { - // /* - // * Create/read digit/zero/blank/sign/special-case - // * - // * 'NUM_S' note: The locale sign is anchored to number and we - // * read/write it when we work with first or last number - // * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). - // * - // * Notice the "Np->inout_p++" at the bottom of the loop. This is - // * why most of the actions advance inout_p one less than you might - // * expect. In cases where we don't want that increment to happen, - // * a switch case ends with "continue" not "break". - // */ - match n.key.id { - id @ (NumPoz::Num9 | NumPoz::Num0 | NumPoz::NumDec | NumPoz::NumD) => { - np.numpart_to_char(id) - } - - NumPoz::NumComma => todo!(), - - NumPoz::NumB => todo!(), - NumPoz::NumC => todo!(), - NumPoz::NumE => todo!(), - + match n { + // Format pictures actions + FormatNode::Action(key) => match key.id { + id @ (NumPoz::Tk9 | NumPoz::Tk0 | NumPoz::TkDec | NumPoz::TkD) => { + np.numpart_to_char(id) + } + _ => unimplemented!(), + }, + FormatNode::End => break, _ => unimplemented!(), } - - // switch (n->key->id) - // { - // case NUM_9: - // case NUM_0: - // case NUM_DEC: - // case NUM_D: - - // case NUM_COMMA: - // if (Np->is_to_char) - // { - // if (!Np->num_in) - // { - // if (IS_FILLMODE(Np->Num)) - // continue; - // else - // *Np->inout_p = ' '; - // } - // else - // *Np->inout_p = ','; - // } - // else - // { - // if (!Np->num_in) - // { - // if (IS_FILLMODE(Np->Num)) - // continue; - // } - // if (*Np->inout_p != ',') - // continue; - // } - // break; - - // case NUM_G: - // pattern = Np->L_thousands_sep; - // pattern_len = strlen(pattern); - // if (Np->is_to_char) - // { - // if (!Np->num_in) - // { - // if (IS_FILLMODE(Np->Num)) - // continue; - // else - // { - // /* just in case there are MB chars */ - // pattern_len = pg_mbstrlen(pattern); - // memset(Np->inout_p, ' ', pattern_len); - // Np->inout_p += pattern_len - 1; - // } - // } - // else - // { - // strcpy(Np->inout_p, pattern); - // Np->inout_p += pattern_len - 1; - // } - // } - // else - // { - // if (!Np->num_in) - // { - // if (IS_FILLMODE(Np->Num)) - // continue; - // } - - // /* - // * Because L_thousands_sep typically contains data - // * characters (either '.' or ','), we can't use - // * NUM_eat_non_data_chars here. Instead skip only if - // * the input matches L_thousands_sep. - // */ - // if (AMOUNT_TEST(pattern_len) && - // strncmp(Np->inout_p, pattern, pattern_len) == 0) - // Np->inout_p += pattern_len - 1; - // else - // continue; - // } - // break; - - // case NUM_L: - // pattern = Np->L_currency_symbol; - // if (Np->is_to_char) - // { - // strcpy(Np->inout_p, pattern); - // Np->inout_p += strlen(pattern) - 1; - // } - // else - // { - // NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); - // continue; - // } - // break; - - // case NUM_RN: - // if (IS_FILLMODE(Np->Num)) - // { - // strcpy(Np->inout_p, Np->number_p); - // Np->inout_p += strlen(Np->inout_p) - 1; - // } - // else - // { - // sprintf(Np->inout_p, "%15s", Np->number_p); - // Np->inout_p += strlen(Np->inout_p) - 1; - // } - // break; - - // case NUM_rn: - // if (IS_FILLMODE(Np->Num)) - // { - // strcpy(Np->inout_p, asc_tolower_z(Np->number_p)); - // Np->inout_p += strlen(Np->inout_p) - 1; - // } - // else - // { - // sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p)); - // Np->inout_p += strlen(Np->inout_p) - 1; - // } - // break; - - // case NUM_th: - // if (IS_ROMAN(Np->Num) || *Np->number == '#' || - // Np->sign == '-' || IS_DECIMAL(Np->Num)) - // continue; - - // if (Np->is_to_char) - // { - // strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); - // Np->inout_p += 1; - // } - // else - // { - // /* All variants of 'th' occupy 2 characters */ - // NUM_eat_non_data_chars(Np, 2, input_len); - // continue; - // } - // break; - - // case NUM_TH: - // if (IS_ROMAN(Np->Num) || *Np->number == '#' || - // Np->sign == '-' || IS_DECIMAL(Np->Num)) - // continue; - - // if (Np->is_to_char) - // { - // strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); - // Np->inout_p += 1; - // } - // else - // { - // /* All variants of 'TH' occupy 2 characters */ - // NUM_eat_non_data_chars(Np, 2, input_len); - // continue; - // } - // break; - - // case NUM_MI: - // if (Np->is_to_char) - // { - // if (Np->sign == '-') - // *Np->inout_p = '-'; - // else if (IS_FILLMODE(Np->Num)) - // continue; - // else - // *Np->inout_p = ' '; - // } - // else - // { - // if (*Np->inout_p == '-') - // *Np->number = '-'; - // else - // { - // NUM_eat_non_data_chars(Np, 1, input_len); - // continue; - // } - // } - // break; - - // case NUM_PL: - // if (Np->is_to_char) - // { - // if (Np->sign == '+') - // *Np->inout_p = '+'; - // else if (IS_FILLMODE(Np->Num)) - // continue; - // else - // *Np->inout_p = ' '; - // } - // else - // { - // if (*Np->inout_p == '+') - // *Np->number = '+'; - // else - // { - // NUM_eat_non_data_chars(Np, 1, input_len); - // continue; - // } - // } - // break; - - // case NUM_SG: - // if (Np->is_to_char) - // *Np->inout_p = Np->sign; - // else - // { - // if (*Np->inout_p == '-') - // *Np->number = '-'; - // else if (*Np->inout_p == '+') - // *Np->number = '+'; - // else - // { - // NUM_eat_non_data_chars(Np, 1, input_len); - // continue; - // } - // } - // break; - - // default: - // continue; - // break; - // } - // } - // else - // { - // /* - // * In TO_CHAR, non-pattern characters in the format are copied to - // * the output. In TO_NUMBER, we skip one input character for each - // * non-pattern format character, whether or not it matches the - // * format character. - // */ - // if (Np->is_to_char) - // { - // strcpy(Np->inout_p, n->character); - // Np->inout_p += strlen(Np->inout_p); - // } - // else - // { - // Np->inout_p += pg_mblen(Np->inout_p); - // } - // continue; - // } - // Np->inout_p++; } - // if (Np->is_to_char) - // { - // *Np->inout_p = '\0'; - // return Np->inout; - // } - // else - // { - // if (*(Np->number_p - 1) == '.') - // *(Np->number_p - 1) = '\0'; - // else - // *Np->number_p = '\0'; - - // /* - // * Correction - precision of dec. number - // */ - // Np->Num->post = Np->read_post; - - // #ifdef DEBUG_TO_FROM_CHAR - // elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number); - // #endif - // return Np->number; - // } - np.inout } fn i32_to_char(value: i32, fmt: &str) -> Result { let mut desc = NumDesc::default(); - let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc)).unwrap(); + let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc))?; let sign = value >= 0; let (numstr, out_pre_spaces) = if desc.flag.contains(NumFlag::Roman) { @@ -1032,10 +706,7 @@ fn i32_to_char(value: i32, fmt: &str) -> Result { (orgnum, 0) } else { let mut orgnum = if desc.flag.contains(NumFlag::Multi) { - todo!(); - // orgnum = DatumGetCString(DirectFunctionCall1(int4out, - // Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi))))); - // desc.pre += desc.multi; + unimplemented!() } else { format!("{}", value.abs()) }; @@ -1089,34 +760,11 @@ mod tests { assert_eq!(" ##.", i32_to_char(123, "99.")?); assert_eq!("-##.", i32_to_char(-123, "99.")?); - // assert_eq!(" ##.#",i32_to_char(123,"99.0")?); - // assert_eq!("-##.#",i32_to_char(-123,"99.0")?); - - Ok(()) - } + assert_eq!(" ##.#", i32_to_char(123, "99.0")?); + assert_eq!("-##.#", i32_to_char(-123, "99.0")?); - fn run_test(num: &str, fmt: &str, sign: bool) { - let mut desc = NumDesc::default(); - let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc)).unwrap(); + assert_eq!(" 0012.0", i32_to_char(12, "9990999.9")?); - let numstr = num.to_string(); - - let numstr_pre_len = match numstr.find('.') { - Some(i) => i, - None => numstr.len(), - }; - - let out_pre_spaces = if numstr_pre_len < desc.pre { - desc.pre - numstr_pre_len - } else { - 0 - }; - - let out = num_processor(&nodes, desc, numstr, out_pre_spaces, sign); - - println!("{out:?}") + Ok(()) } } - -// 123 '9990999' 0123 -// 123 '99900999.999' 0123.000 From 9b6b0ac84492cc8022656d028eb55a5ff58b32e8 Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 24 Sep 2024 17:03:40 +0800 Subject: [PATCH 4/9] register Signed-off-by: coldWater --- src/common/io/src/number.rs | 57 +++++++++++------------- src/query/functions/src/scalars/other.rs | 32 +++++++++++++ 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/common/io/src/number.rs b/src/common/io/src/number.rs index 48a1232f7cc1..9ec10b03be67 100644 --- a/src/common/io/src/number.rs +++ b/src/common/io/src/number.rs @@ -128,7 +128,7 @@ enum NumFlag { Multi, PlusPost, MinusPost, - EEEE, + Eeee, } #[derive(Debug, Clone, Copy)] @@ -173,7 +173,7 @@ struct NumDesc { impl NumDesc { fn prepare(&mut self, n: &FormatNode) -> std::result::Result<(), &'static str> { if let FormatNode::Action(key) = n { - if self.flag.contains(NumFlag::EEEE) && !matches!(key.id, NumPoz::TkE) { + if self.flag.contains(NumFlag::Eeee) && !matches!(key.id, NumPoz::TkE) { return Err("\"EEEE\" must be the last pattern used"); } @@ -193,7 +193,7 @@ impl NumDesc { } else { self.pre += 1; } - return Ok(()); + Ok(()) } NumPoz::Tk0 => { @@ -341,7 +341,7 @@ impl NumDesc { } NumPoz::TkE => { - if self.flag.contains(NumFlag::EEEE) { + if self.flag.contains(NumFlag::Eeee) { return Err("cannot use \"EEEE\" twice"); } @@ -360,7 +360,7 @@ impl NumDesc { ); } - self.flag.insert(NumFlag::EEEE); + self.flag.insert(NumFlag::Eeee); Ok(()) } @@ -484,35 +484,28 @@ impl NumProc { } else { // Write Decimal point if self.number.get(self.number_p).is_some_and(|c| *c == '.') { - if !self.last_relevant_is_dot() { - self.inout.push_str(&self.decimal) /* Write DEC/D */ - } - // Ora 'n' -- FM9.9 --> 'n.' - else if self.desc.flag.contains(NumFlag::FillMode) - && self.last_relevant_is_dot() + if !self.last_relevant_is_dot() + || self.desc.flag.contains(NumFlag::FillMode) && self.last_relevant_is_dot() + // Ora 'n' -- FM9.9 --> 'n.'s { self.inout.push_str(&self.decimal) /* Write DEC/D */ } - } else { - if self.last_relevant.is_some_and(|(_, i)| self.number_p > i) - && !matches!(id, NumPoz::Tk0) - { + } else if self.last_relevant.is_some_and(|(_, i)| self.number_p > i) + && !matches!(id, NumPoz::Tk0) + { + } + // '0.1' -- 9.9 --> ' .1' + else if self.is_predec_space() { + if self.desc.flag.contains(NumFlag::FillMode) { + self.inout.push(' '); } - // '0.1' -- 9.9 --> ' .1' - else if self.is_predec_space() { - if self.desc.flag.contains(NumFlag::FillMode) { - self.inout.push(' '); - } - // '0' -- FM9.9 --> '0.' - else if self.last_relevant_is_dot() { - self.inout.push('0') - } - } else { - if self.number_p < self.number.len() { - self.inout.push(self.number[self.number_p]); /* Write DIGIT */ - self.num_in = true - } + // '0' -- FM9.9 --> '0.' + else if self.last_relevant_is_dot() { + self.inout.push('0') } + } else if self.number_p < self.number.len() { + self.inout.push(self.number[self.number_p]); /* Write DIGIT */ + self.num_in = true } if self.number_p < self.number.len() { self.number_p += 1; @@ -567,7 +560,7 @@ impl NumProc { _ => {} } } - self.last_relevant = n.map(|n| (*self.number.iter().nth(n).unwrap(), n)); + self.last_relevant = n.map(|n| (*self.number.get(n).unwrap(), n)); } fn last_relevant_is_dot(&self) -> bool { @@ -689,14 +682,14 @@ fn num_processor( np.inout } -fn i32_to_char(value: i32, fmt: &str) -> Result { +pub fn i32_to_char(value: i32, fmt: &str) -> Result { let mut desc = NumDesc::default(); let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc))?; let sign = value >= 0; let (numstr, out_pre_spaces) = if desc.flag.contains(NumFlag::Roman) { unimplemented!() - } else if desc.flag.contains(NumFlag::EEEE) { + } else if desc.flag.contains(NumFlag::Eeee) { // we can do it easily because f32 won't lose any precision let orgnum = format!("{:+.*e}", desc.post, value as f32); diff --git a/src/query/functions/src/scalars/other.rs b/src/query/functions/src/scalars/other.rs index 9adaf5529775..81605649fed5 100644 --- a/src/query/functions/src/scalars/other.rs +++ b/src/query/functions/src/scalars/other.rs @@ -34,6 +34,7 @@ use databend_common_expression::types::ArgType; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; use databend_common_expression::types::GenericType; +use databend_common_expression::types::Int32Type; use databend_common_expression::types::NullType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberColumn; @@ -45,6 +46,7 @@ use databend_common_expression::types::StringType; use databend_common_expression::types::TimestampType; use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_with_builder_1_arg; +use databend_common_expression::vectorize_with_builder_2_arg; use databend_common_expression::Column; use databend_common_expression::Domain; use databend_common_expression::EvalContext; @@ -58,6 +60,7 @@ use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use databend_common_expression::Value; use databend_common_expression::ValueRef; +use databend_common_io::number::i32_to_char; use rand::Rng; use rand::SeedableRng; @@ -73,6 +76,7 @@ pub fn register(registry: &mut FunctionRegistry) { register_inet_ntoa(registry); register_run_diff(registry); register_grouping(registry); + register_num_to_char(registry); registry.properties.insert( "rand".to_string(), @@ -384,6 +388,34 @@ fn register_grouping(registry: &mut FunctionRegistry) { }) } +fn register_num_to_char(registry: &mut FunctionRegistry) { + registry.register_passthrough_nullable_2_arg::( + "to_char", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::( + |value, fmt, builder, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(builder.len()) { + builder.commit_row(); + return; + } + } + + match i32_to_char(value, fmt) { + Ok(s) => { + builder.put_str(&s); + builder.commit_row() + } + Err(e) => { + ctx.set_error(builder.len(), e.to_string()); + builder.commit_row() + } + } + }, + ), + ) +} + /// Compute `grouping` by `grouping_id` and `cols`. /// /// `cols` are indices of the column represented in `_grouping_id`. From 930cf960366b368daf9bad615a224e683092c79c Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 24 Sep 2024 17:23:35 +0800 Subject: [PATCH 5/9] test Signed-off-by: coldWater --- .typos.toml | 1 + .../functions/tests/it/scalars/testdata/function_list.txt | 2 ++ .../suites/query/functions/02_0078_function_to_char.test | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test diff --git a/.typos.toml b/.typos.toml index 6cf9781affdc..d6c6fcd09e1d 100644 --- a/.typos.toml +++ b/.typos.toml @@ -12,6 +12,7 @@ "ser" = "ser" "Ser" = "Ser" "flate" = "flate" +"Tke" = "Tke" [files] extend-exclude = [ diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 862816ad1205..06620c824efa 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -3726,6 +3726,8 @@ Functions overloads: 21 to_boolean(Float32 NULL) :: Boolean NULL 22 to_boolean(Float64) :: Boolean 23 to_boolean(Float64 NULL) :: Boolean NULL +0 to_char(Int32, String) :: String +1 to_char(Int32 NULL, String NULL) :: String NULL 0 to_date(Variant) :: Date 1 to_date(Variant NULL) :: Date NULL 2 to_date(String, String) :: Date NULL diff --git a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test new file mode 100644 index 000000000000..192a4ac20697 --- /dev/null +++ b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test @@ -0,0 +1,4 @@ +query T +select to_char(123,'0099'); +---- + 0123 \ No newline at end of file From a29d4ddba9c7083a6f0e46552a19a807a1f79c80 Mon Sep 17 00:00:00 2001 From: coldWater Date: Wed, 25 Sep 2024 20:10:25 +0800 Subject: [PATCH 6/9] f64_to_num_part Signed-off-by: coldWater --- Cargo.lock | 1 + src/common/io/Cargo.toml | 1 + src/common/io/src/number.rs | 395 ++++++++++++++---- src/query/functions/src/scalars/other.rs | 36 +- .../it/scalars/testdata/function_list.txt | 6 +- 5 files changed, 354 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 184b9f084683..1aed75fd3f8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3656,6 +3656,7 @@ dependencies = [ "chrono-tz 0.8.6", "databend-common-base", "databend-common-exception", + "enquote", "enumflags2", "ethnum", "geo", diff --git a/src/common/io/Cargo.toml b/src/common/io/Cargo.toml index 88f98e6e3565..b32564997b6b 100644 --- a/src/common/io/Cargo.toml +++ b/src/common/io/Cargo.toml @@ -18,6 +18,7 @@ chrono = { workspace = true } chrono-tz = { workspace = true } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } +enquote = "1.1.0" enumflags2 = { workspace = true } ethnum = { workspace = true } geo = { workspace = true } diff --git a/src/common/io/src/number.rs b/src/common/io/src/number.rs index 9ec10b03be67..be00a0ab6896 100644 --- a/src/common/io/src/number.rs +++ b/src/common/io/src/number.rs @@ -149,7 +149,7 @@ impl KeyWord { enum FormatNode { End, Action(KeyWord), - Char(Vec), + Char(String), Separator, Space, } @@ -293,6 +293,7 @@ impl NumDesc { } Ok(()) } + NumPoz::TkPL => { if self.flag.contains(NumFlag::LSign) { return Err("cannot use \"S\" and \"PL\" together"); @@ -304,6 +305,7 @@ impl NumDesc { } Ok(()) } + NumPoz::TkSG => { if self.flag.contains(NumFlag::LSign) { return Err("cannot use \"S\" and \"SG\" together"); @@ -311,6 +313,7 @@ impl NumDesc { self.flag.insert(NumFlag::Plus | NumFlag::Minus); Ok(()) } + NumPoz::TkPR => { if self .flag @@ -322,6 +325,7 @@ impl NumDesc { self.flag.insert(NumFlag::Bracket); Ok(()) } + NumPoz::Tkrn | NumPoz::TkRN => { self.flag.insert(NumFlag::Roman); Ok(()) @@ -355,21 +359,146 @@ impl NumDesc { | NumFlag::Roman | NumFlag::Multi, ) { - return Err( - "\"EEEE\" may only be used together with digit and decimal point patterns.", - ); + return Err("\"EEEE\" is incompatible with other formats"); } self.flag.insert(NumFlag::Eeee); Ok(()) } + NumPoz::TkComma => Ok(()), + _ => unreachable!(), } } else { unreachable!() } } + + fn i64_to_num_part(&self, value: i64) -> Result { + if self.flag.contains(NumFlag::Roman) { + return Err(ErrorCode::Unimplemented("to_char RN (Roman numeral)")); + } + + if self.flag.contains(NumFlag::Eeee) { + // we can do it easily because f64 won't lose any precision + let number = format!("{:+.*e}", self.post, value as f64); + + // Swap a leading positive sign for a space. + let number = number.replace("+", " "); + + return Ok(NumPart { + sign: value >= 0, + number, + out_pre_spaces: 0, + }); + } + + if self.flag.contains(NumFlag::Multi) { + return Err(ErrorCode::Unimplemented("to_char V (multiplies)")); + } + + let mut orgnum = if value == i64::MIN { + format!("{}", -(i64::MIN as i128)) + } else { + format!("{}", value.abs()) + }; + + let numstr_pre_len = orgnum.len(); + + // post-decimal digits? Pad out with zeros. + if self.post > 0 { + orgnum.push('.'); + orgnum.push_str(&"0".repeat(self.post)) + } + + let (number, out_pre_spaces) = match numstr_pre_len.cmp(&self.pre) { + // needs padding? + std::cmp::Ordering::Less => (orgnum, self.pre - numstr_pre_len), + // overflowed prefix digit format? + std::cmp::Ordering::Greater => { + (["#".repeat(self.pre), "#".repeat(self.post)].join("."), 0) + } + std::cmp::Ordering::Equal => (orgnum, 0), + }; + + Ok(NumPart { + sign: value >= 0, + number, + out_pre_spaces, + }) + } + + fn f64_to_num_part(&mut self, value: f64) -> Result { + if self.flag.contains(NumFlag::Roman) { + return Err(ErrorCode::Unimplemented("to_char RN (Roman numeral)")); + } + + if self.flag.contains(NumFlag::Eeee) { + let number = if value.is_normal() { + let orgnum = format!("{:+.*e}", self.post, value); + // Swap a leading positive sign for a space. + orgnum.replace("+", " ") + } else { + // Allow 6 characters for the leading sign, the decimal point, + // "e", the exponent's sign and two exponent digits. + let mut orgnum = String::with_capacity(self.pre + self.post + 6); + orgnum.push(' '); + orgnum.push_str(&"#".repeat(self.pre)); + orgnum.push('.'); + orgnum.push_str(&"#".repeat(self.post + 4)); + orgnum + }; + return Ok(NumPart { + sign: !value.is_sign_negative(), + number, + out_pre_spaces: 0, + }); + } + + if self.flag.contains(NumFlag::Multi) { + return Err(ErrorCode::Unimplemented("to_char V (multiplies)")); + } + + let orgnum = format!("{:.0}", value.abs()); + let numstr_pre_len = orgnum.len(); + + const FLT_DIG: usize = 6; + // adjust post digits to fit max float digits + if numstr_pre_len >= FLT_DIG { + self.post = 0; + } else if numstr_pre_len + self.post > FLT_DIG { + self.post = FLT_DIG - numstr_pre_len; + } + let orgnum = format!("{:.*}", self.post, value.abs()); + + let numstr_pre_len = match orgnum.find('.') { + Some(p) => p, + None => orgnum.len(), + }; + + let (number, out_pre_spaces) = match numstr_pre_len.cmp(&self.pre) { + // needs padding? + std::cmp::Ordering::Less => (orgnum, self.pre - numstr_pre_len), + // overflowed prefix digit format? + std::cmp::Ordering::Greater => { + (["#".repeat(self.pre), "#".repeat(self.post)].join("."), 0) + } + std::cmp::Ordering::Equal => (orgnum, 0), + }; + + Ok(NumPart { + sign: !value.is_sign_negative(), + number, + out_pre_spaces, + }) + } +} + +struct NumPart { + sign: bool, + number: String, + out_pre_spaces: usize, } fn parse_format( @@ -379,25 +508,58 @@ fn parse_format( ) -> Result> { let mut nodes = Vec::new(); while !str.is_empty() { - match kw.iter().find(|k| str.starts_with(k.name)) { - Some(k) => { - let n = FormatNode::Action(k.clone()); + if let Some(remain) = str.strip_prefix(' ') { + str = remain; + nodes.push(FormatNode::Space); + continue; + } - if let Some(num) = num.as_mut() { - num.prepare(&n).map_err(ErrorCode::SyntaxException)?; - } - str = &str[k.name.len()..]; + if str.starts_with('"') { + let (offset, literal) = + parse_literal_string(str).map_err(|e| ErrorCode::SyntaxException(e.to_string()))?; + nodes.push(FormatNode::Char(literal)); + str = &str[offset..]; + continue; + } + + if let Some(k) = kw.iter().find(|k| str.starts_with(k.name)) { + let n = FormatNode::Action(k.clone()); - nodes.push(n) + if let Some(num) = num.as_mut() { + num.prepare(&n).map_err(ErrorCode::SyntaxException)?; } - None => Err(ErrorCode::SyntaxException( - "Currently only key words are supported".to_string(), - ))?, + str = &str[k.name.len()..]; + + nodes.push(n); + continue; } + + Err(ErrorCode::SyntaxException( + "Currently only key words are supported".to_string(), + ))?; } Ok(nodes) } +fn parse_literal_string(data: &str) -> std::result::Result<(usize, String), enquote::Error> { + let mut escape = false; + for (i, ch) in data.char_indices() { + if i == 0 { + continue; + } + match ch { + '"' if !escape => { + let end = i + 1; + return enquote::unquote(&data[..end]).map(|s| (end, s)); + } + '\\' if !escape => escape = true, + _ if escape => escape = false, + _ => {} + } + } + Err(enquote::Error::UnexpectedEOF) +} + struct NumProc { desc: NumDesc, // number description @@ -496,7 +658,7 @@ impl NumProc { } // '0.1' -- 9.9 --> ' .1' else if self.is_predec_space() { - if self.desc.flag.contains(NumFlag::FillMode) { + if !self.desc.flag.contains(NumFlag::FillMode) { self.inout.push(' '); } // '0' -- FM9.9 --> '0.' @@ -568,13 +730,12 @@ impl NumProc { } } -fn num_processor( - nodes: &[FormatNode], - desc: NumDesc, - number: String, - out_pre_spaces: usize, - sign: bool, -) -> String { +fn num_processor(nodes: &[FormatNode], desc: NumDesc, num_part: NumPart) -> Result { + let NumPart { + sign, + number, + out_pre_spaces, + } = num_part; let mut np = NumProc { desc, sign, @@ -601,6 +762,10 @@ fn num_processor( np.desc.zero_start -= 1; } + if np.desc.flag.contains(NumFlag::Eeee) { + return Ok(String::from_iter(np.number.iter())); + } + // Roman correction if np.desc.flag.contains(NumFlag::Roman) { unimplemented!() @@ -662,7 +827,10 @@ fn num_processor( } // Locale - // NUM_prepare_locale(Np); + if np.desc.need_locale { + // NUM_prepare_locale(Np); + return Err(ErrorCode::Unimplemented("to_char uses locale S/L/D/G")); + } // Processor direct cycle for n in nodes.iter() { @@ -672,58 +840,51 @@ fn num_processor( id @ (NumPoz::Tk9 | NumPoz::Tk0 | NumPoz::TkDec | NumPoz::TkD) => { np.numpart_to_char(id) } + NumPoz::TkComma => { + if np.num_in { + np.inout.push(',') + } else if np.desc.flag.contains(NumFlag::FillMode) { + continue; + } else { + np.inout.push(' ') + } + } + NumPoz::TkPR => (), + NumPoz::TkFM => (), _ => unimplemented!(), }, FormatNode::End => break, + FormatNode::Char(character) => { + // In TO_CHAR, non-pattern characters in the format are copied to + // the output. + np.inout.push_str(character) + } + FormatNode::Space => np.inout.push(' '), _ => unimplemented!(), } } - np.inout + Ok(np.inout) } -pub fn i32_to_char(value: i32, fmt: &str) -> Result { +pub fn i64_to_char(value: i64, fmt: &str) -> Result { + // TODO: We should cache FormatNode let mut desc = NumDesc::default(); let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc))?; - let sign = value >= 0; - let (numstr, out_pre_spaces) = if desc.flag.contains(NumFlag::Roman) { - unimplemented!() - } else if desc.flag.contains(NumFlag::Eeee) { - // we can do it easily because f32 won't lose any precision - let orgnum = format!("{:+.*e}", desc.post, value as f32); - - // Swap a leading positive sign for a space. - let orgnum = orgnum.replace("+", "_"); + let num_part = desc.i64_to_num_part(value)?; - (orgnum, 0) - } else { - let mut orgnum = if desc.flag.contains(NumFlag::Multi) { - unimplemented!() - } else { - format!("{}", value.abs()) - }; - - let numstr_pre_len = orgnum.len(); + num_processor(&nodes, desc, num_part) +} - // post-decimal digits? Pad out with zeros. - if desc.post > 0 { - orgnum.push('.'); - orgnum.push_str(&"0".repeat(desc.post)) - } +pub fn f64_to_char(value: f64, fmt: &str) -> Result { + // TODO: We should cache FormatNode + let mut desc = NumDesc::default(); + let nodes = parse_format(fmt, &NUM_KEYWORDS, Some(&mut desc))?; - match numstr_pre_len.cmp(&desc.pre) { - // needs padding? - std::cmp::Ordering::Less => (orgnum, desc.pre - numstr_pre_len), - std::cmp::Ordering::Equal => (orgnum, 0), - std::cmp::Ordering::Greater => { - // overflowed prefix digit format? - (["#".repeat(desc.pre), "#".repeat(desc.post)].join("."), 0) - } - } - }; + let num_part = desc.f64_to_num_part(value)?; - Ok(num_processor(&nodes, desc, numstr, out_pre_spaces, sign)) + num_processor(&nodes, desc, num_part) } #[cfg(test)] @@ -731,32 +892,110 @@ mod tests { use super::*; #[test] - fn test_i32() -> Result<()> { - assert_eq!(" 123", i32_to_char(123, "999")?); - assert_eq!("-123", i32_to_char(-123, "999")?); + fn test_i64() -> Result<()> { + assert_eq!(" 123", i64_to_char(123, "999")?); + assert_eq!("-123", i64_to_char(-123, "999")?); + + assert_eq!(" 0123", i64_to_char(123, "0999")?); + assert_eq!("-0123", i64_to_char(-123, "0999")?); + + assert_eq!(" 123", i64_to_char(123, "99999")?); + assert_eq!(" -123", i64_to_char(-123, "99999")?); + + assert_eq!(" 0123", i64_to_char(123, "9990999")?); + assert_eq!(" -0123", i64_to_char(-123, "9990999")?); - assert_eq!(" 0123", i32_to_char(123, "0999")?); - assert_eq!("-0123", i32_to_char(-123, "0999")?); + assert_eq!(" 0123 ", i64_to_char(123, "9990999PR")?); + assert_eq!(" <0123>", i64_to_char(-123, "9990999PR")?); - assert_eq!(" 123", i32_to_char(123, "99999")?); - assert_eq!(" -123", i32_to_char(-123, "99999")?); + assert_eq!(" 12345", i64_to_char(12345, "9990999")?); + assert_eq!(" -12345", i64_to_char(-12345, "9990999")?); - assert_eq!(" 0123", i32_to_char(123, "9990999")?); - assert_eq!(" -0123", i32_to_char(-123, "9990999")?); + assert_eq!(" 0012.0", i64_to_char(12, "9990999.9")?); + assert_eq!(" -0012.0", i64_to_char(-12, "9990999.9")?); + assert_eq!("0012.", i64_to_char(12, "FM9990999.9")?); + assert_eq!("-0012.", i64_to_char(-12, "FM9990999.9")?); - assert_eq!(" 12345", i32_to_char(12345, "9990999")?); - assert_eq!(" -12345", i32_to_char(-12345, "9990999")?); + assert_eq!(" ##", i64_to_char(123, "99")?); + assert_eq!("-##", i64_to_char(-123, "99")?); - assert_eq!(" ##", i32_to_char(123, "99")?); - assert_eq!("-##", i32_to_char(-123, "99")?); + assert_eq!(" ##.", i64_to_char(123, "99.")?); + assert_eq!("-##.", i64_to_char(-123, "99.")?); - assert_eq!(" ##.", i32_to_char(123, "99.")?); - assert_eq!("-##.", i32_to_char(-123, "99.")?); + assert_eq!(" ##.#", i64_to_char(123, "99.0")?); + assert_eq!("-##.#", i64_to_char(-123, "99.0")?); - assert_eq!(" ##.#", i32_to_char(123, "99.0")?); - assert_eq!("-##.#", i32_to_char(-123, "99.0")?); + assert_eq!( + " 9223372036854775807", + i64_to_char(i64::MAX, "99999999999999999999")? + ); + assert_eq!( + " -9223372036854775808", + i64_to_char(i64::MIN, "99999999999999999999")? + ); + assert_eq!( + " -9223372036854775807", + i64_to_char(i64::MIN + 1, "99999999999999999999")? + ); - assert_eq!(" 0012.0", i32_to_char(12, "9990999.9")?); + // Regarding the way the exponent part of the scientific notation is formatted, + // there is a slight difference between the rust implementation and the c implementation. + // 1.23456000e+05 + assert_eq!(" 1.23456000e5", i64_to_char(123456, "9.99999999EEEE")?); + assert_eq!("-1.23456e5", i64_to_char(-123456, "9.99999EEEE")?); + + assert_eq!(" 4 8 5", i64_to_char(485, "9 9 9")?); + assert_eq!(" 1,485", i64_to_char(1485, "9,999")?); + // assert_eq!(" 1 485", i64_to_char(1485, "9G999")?); + + assert_eq!("Good number: 485", i64_to_char(485, "\"Good number:\"999")?); + + Ok(()) + } + + #[test] + fn test_f64() -> Result<()> { + assert_eq!(" 12.34", f64_to_char(12.34, "99.99")?); + assert_eq!("-12.34", f64_to_char(-12.34, "99.99")?); + assert_eq!(" .10", f64_to_char(0.1, "99.99")?); + assert_eq!(" -.10", f64_to_char(-0.1, "99.99")?); + + assert_eq!(" 4.86e-4", f64_to_char(0.0004859, "9.99EEEE")?); + assert_eq!("-4.86e-4", f64_to_char(-0.0004859, "9.99EEEE")?); + + assert_eq!(" 0.1", f64_to_char(0.1, "0.9")?); + assert_eq!("-.1", f64_to_char(-0.1, "FM9.99")?); + assert_eq!("-0.1", f64_to_char(-0.1, "FM90.99")?); + + assert_eq!(" 148.500", f64_to_char(148.5, "999.999")?); + assert_eq!("148.5", f64_to_char(148.5, "FM999.999")?); + assert_eq!("148.500", f64_to_char(148.5, "FM999.990")?); + + assert_eq!( + "Pre: 485 Post: .800", + f64_to_char(485.8, "\"Pre:\"999\" Post:\" .999")? + ); + + // assert_eq!(" 148,500", f64_to_char(148.5, "999D999")?); + // assert_eq!(" 3 148,500", f64_to_char(3148.5, "9G999D999")?); + // assert_eq!("485-", f64_to_char(-485, "999S")?); + // assert_eq!("485-", f64_to_char(-485, "999MI")?); + // assert_eq!("485 ", f64_to_char(485, "999MI")?); + // assert_eq!("485", f64_to_char(485, "FM999MI")?); + // assert_eq!("+485", f64_to_char(485, "PL999")?); + // assert_eq!("+485", f64_to_char(485, "SG999")?); + // assert_eq!("-485", f64_to_char(-485, "SG999")?); + // assert_eq!("4-85", f64_to_char(-485, "9SG99")?); + // assert_eq!("<485>", f64_to_char(-485, "999PR")?); + // assert_eq!("DM 485", f64_to_char(485, "L999")?); + // assert_eq!(" CDLXXXV", f64_to_char(485, "RN")?); + // assert_eq!("CDLXXXV", f64_to_char(485, "FMRN")?); + // assert_eq!("V", f64_to_char(5.2, "FMRN")?); + // assert_eq!(" 482nd", f64_to_char(482, "999th")?); + + // assert_eq!(" 12000", f64_to_char(12, "99V999")?); + // assert_eq!(" 12400", f64_to_char(12.4, "99V999")?); + // assert_eq!(" 125", f64_to_char(12.45, "99V9")?); Ok(()) } diff --git a/src/query/functions/src/scalars/other.rs b/src/query/functions/src/scalars/other.rs index 81605649fed5..9d1c98b934e7 100644 --- a/src/query/functions/src/scalars/other.rs +++ b/src/query/functions/src/scalars/other.rs @@ -34,7 +34,6 @@ use databend_common_expression::types::ArgType; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; use databend_common_expression::types::GenericType; -use databend_common_expression::types::Int32Type; use databend_common_expression::types::NullType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberColumn; @@ -60,7 +59,8 @@ use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use databend_common_expression::Value; use databend_common_expression::ValueRef; -use databend_common_io::number::i32_to_char; +use databend_common_io::number::f64_to_char; +use databend_common_io::number::i64_to_char; use rand::Rng; use rand::SeedableRng; @@ -389,10 +389,10 @@ fn register_grouping(registry: &mut FunctionRegistry) { } fn register_num_to_char(registry: &mut FunctionRegistry) { - registry.register_passthrough_nullable_2_arg::( + registry.register_passthrough_nullable_2_arg::( "to_char", |_, _, _| FunctionDomain::MayThrow, - vectorize_with_builder_2_arg::( + vectorize_with_builder_2_arg::( |value, fmt, builder, ctx| { if let Some(validity) = &ctx.validity { if !validity.get_bit(builder.len()) { @@ -401,7 +401,33 @@ fn register_num_to_char(registry: &mut FunctionRegistry) { } } - match i32_to_char(value, fmt) { + match i64_to_char(value, fmt) { + Ok(s) => { + builder.put_str(&s); + builder.commit_row() + } + Err(e) => { + ctx.set_error(builder.len(), e.to_string()); + builder.commit_row() + } + } + }, + ), + ); + + registry.register_passthrough_nullable_2_arg::( + "to_char", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::( + |value, fmt, builder, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(builder.len()) { + builder.commit_row(); + return; + } + } + + match f64_to_char(*value, fmt) { Ok(s) => { builder.put_str(&s); builder.commit_row() diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 06620c824efa..5f4c60400817 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -3726,8 +3726,10 @@ Functions overloads: 21 to_boolean(Float32 NULL) :: Boolean NULL 22 to_boolean(Float64) :: Boolean 23 to_boolean(Float64 NULL) :: Boolean NULL -0 to_char(Int32, String) :: String -1 to_char(Int32 NULL, String NULL) :: String NULL +0 to_char(Int64, String) :: String +1 to_char(Int64 NULL, String NULL) :: String NULL +2 to_char(Float64, String) :: String +3 to_char(Float64 NULL, String NULL) :: String NULL 0 to_date(Variant) :: Date 1 to_date(Variant NULL) :: Date NULL 2 to_date(String, String) :: Date NULL From 86b831cb1ecc0ecc2b46097c049a3a2b151dff9e Mon Sep 17 00:00:00 2001 From: coldWater Date: Wed, 25 Sep 2024 21:10:05 +0800 Subject: [PATCH 7/9] test Signed-off-by: coldWater --- .../functions/02_0078_function_to_char.test | 171 +++++++++++++++++- 1 file changed, 170 insertions(+), 1 deletion(-) diff --git a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test index 192a4ac20697..184a3115ab56 100644 --- a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test +++ b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test @@ -1,4 +1,173 @@ +# https://github.com/postgres/postgres/blob/master/src/test/regress/expected/int8.out + +statement ok +DROP TABLE IF EXISTS INT64_TBL; + +statement ok +CREATE TABLE INT64_TBL(q1 int64, q2 int64); + +statement ok +INSERT INTO INT64_TBL VALUES + ('123','456'), + ('123','4567890123456789'), + ('4567890123456789','123'), + (+4567890123456789,'4567890123456789'), + ('+4567890123456789','-4567890123456789'); + +# query T +# SELECT to_char(q1, '9G999G999G999G999G999'), to_char(q2, '9,999,999,999,999,999') FROM INT64_TBL; +#------------------------+------------------------ +# 123 | 456 +# 123 | 4,567,890,123,456,789 +# 4,567,890,123,456,789 | 123 +# 4,567,890,123,456,789 | 4,567,890,123,456,789 +# 4,567,890,123,456,789 | -4,567,890,123,456,789 + +# query T +# SELECT to_char(q1, '9G999G999G999G999G999D999G999'), to_char(q2, '9,999,999,999,999,999.999,999') FROM INT64_TBL; +#--------------------------------+-------------------------------- +# 123.000,000 | 456.000,000 +# 123.000,000 | 4,567,890,123,456,789.000,000 +# 4,567,890,123,456,789.000,000 | 123.000,000 +# 4,567,890,123,456,789.000,000 | 4,567,890,123,456,789.000,000 +# 4,567,890,123,456,789.000,000 | -4,567,890,123,456,789.000,000 + +query T +SELECT to_char( (q1 * -1), '9999999999999999PR'), to_char( (q2 * -1), '9999999999999999.999PR') FROM INT64_TBL; +---- + <123> <456.000> + <123> <4567890123456789.000> + <4567890123456789> <123.000> + <4567890123456789> <4567890123456789.000> + <4567890123456789> 4567890123456789.000 + +# query T +# SELECT to_char( (q1 * -1), '9999999999999999S'), to_char( (q2 * -1), 'S9999999999999999') FROM INT64_TBL; +#-------------------+------------------- +# 123- | -456 +# 123- | -4567890123456789 +# 4567890123456789- | -123 +# 4567890123456789- | -4567890123456789 +# 4567890123456789- | +4567890123456789 + +# query T +# SELECT to_char(q2, 'MI9999999999999999') FROM INT64_TBL; +#------------------- +# 456 +# 4567890123456789 +# 123 +# 4567890123456789 +# -4567890123456789 + +# query T +# SELECT to_char(q2, 'FMS9999999999999999') FROM INT64_TBL; +#------------------- +# +456 +# +4567890123456789 +# +123 +# +4567890123456789 +# -4567890123456789 + +# query T +# SELECT to_char(q2, 'FM9999999999999999THPR') FROM INT64_TBL; +#-------------------- +# 456TH +# 4567890123456789TH +# 123RD +# 4567890123456789TH +# <4567890123456789> + +# query T +# SELECT to_char(q2, 'SG9999999999999999th') FROM INT64_TBL; +#--------------------- +# + 456th +# +4567890123456789th +# + 123rd +# +4567890123456789th +# -4567890123456789 + +query T +SELECT to_char(q2, '0999999999999999') FROM INT64_TBL; +---- + 0000000000000456 + 4567890123456789 + 0000000000000123 + 4567890123456789 + -4567890123456789 + +# query T +# SELECT to_char(q2, 'S0999999999999999') FROM INT64_TBL; +#------------------- +# +0000000000000456 +# +4567890123456789 +# +0000000000000123 +# +4567890123456789 +# -4567890123456789 + + +query T +SELECT to_char(q2, 'FM0999999999999999') FROM INT64_TBL; +---- + 0000000000000456 + 4567890123456789 + 0000000000000123 + 4567890123456789 + -4567890123456789 + +query T +SELECT to_char(q2, 'FM9999999999999999.000') FROM INT64_TBL; +---- + 456.000 + 4567890123456789.000 + 123.000 + 4567890123456789.000 + -4567890123456789.000 + +# query T +# SELECT to_char(q2, 'L9999999999999999.000') FROM INT64_TBL; +#------------------------ +# 456.000 +# 4567890123456789.000 +# 123.000 +# 4567890123456789.000 +# -4567890123456789.000 + +query T +SELECT to_char(q2, 'FM9999999999999999.999') FROM INT64_TBL; +---- + 456. + 4567890123456789. + 123. + 4567890123456789. + -4567890123456789. + + +# SELECT to_char(q2, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9') FROM INT64_TBL; +query T +SELECT to_char(q2, '9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9') FROM INT64_TBL; +---- + 4 5 6 . 0 0 0 + 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 . 0 0 0 + 1 2 3 . 0 0 0 + 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 . 0 0 0 + -4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 . 0 0 0 + +query T +SELECT to_char(q2, '99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM INT64_TBL; +---- + text 9999 "text between quote marks" 456 + 45678 text 9012 9999 345 "text between quote marks" 6789 + text 9999 "text between quote marks" 123 + 45678 text 9012 9999 345 "text between quote marks" 6789 + -45678 text 9012 9999 345 "text between quote marks" 6789 + +# query T +# SELECT to_char(q2, '999999SG9999999999') FROM INT64_TBL; + query T select to_char(123,'0099'); ---- - 0123 \ No newline at end of file + 0123 + +statement ok +DROP TABLE IF EXISTS INT64_TBL; From 21a165dfb49eb30a3a64c78092de926a711a9e4e Mon Sep 17 00:00:00 2001 From: coldWater Date: Wed, 25 Sep 2024 23:44:48 +0800 Subject: [PATCH 8/9] SG PL MI Signed-off-by: coldWater --- src/common/io/src/number.rs | 69 ++++++++++++++----- .../functions/02_0078_function_to_char.test | 61 ++++++++-------- 2 files changed, 85 insertions(+), 45 deletions(-) diff --git a/src/common/io/src/number.rs b/src/common/io/src/number.rs index be00a0ab6896..4806a3a2acc7 100644 --- a/src/common/io/src/number.rs +++ b/src/common/io/src/number.rs @@ -774,12 +774,13 @@ fn num_processor(nodes: &[FormatNode], desc: NumDesc, num_part: NumPart) -> Resu // Sign // MI/PL/SG - write sign itself and not in number - if np.desc.flag.contains(NumFlag::Plus | NumFlag::Minus) { - if np.desc.flag.contains(NumFlag::Plus) && !np.desc.flag.contains(NumFlag::Minus) { - np.sign_wrote = false; /* need sign */ - } else { - np.sign_wrote = true; /* needn't sign */ - } + if np.desc.flag.intersects(NumFlag::Plus | NumFlag::Minus) { + // if np.desc.flag.contains(NumFlag::Plus) && !np.desc.flag.contains(NumFlag::Minus) { + // np.sign_wrote = false; /* need sign */ + // } else { + // TODO: Why is this not the same as the postgres implementation? + np.sign_wrote = true; /* needn't sign */ + // } } else { if np.sign && np.desc.flag.contains(NumFlag::FillMode) { np.desc.flag.remove(NumFlag::Bracket) @@ -840,15 +841,37 @@ fn num_processor(nodes: &[FormatNode], desc: NumDesc, num_part: NumPart) -> Resu id @ (NumPoz::Tk9 | NumPoz::Tk0 | NumPoz::TkDec | NumPoz::TkD) => { np.numpart_to_char(id) } + NumPoz::TkComma => { if np.num_in { - np.inout.push(',') - } else if np.desc.flag.contains(NumFlag::FillMode) { + np.inout.push(','); continue; - } else { + } + if !np.desc.flag.contains(NumFlag::FillMode) { np.inout.push(' ') } } + + NumPoz::TkMI => { + if np.sign { + if !np.desc.flag.contains(NumFlag::FillMode) { + np.inout.push(' '); + } + } else { + np.inout.push('-'); + } + } + + NumPoz::TkPL => { + if np.sign { + np.inout.push('+'); + } else if !np.desc.flag.contains(NumFlag::FillMode) { + np.inout.push(' '); + } + } + + NumPoz::TkSG => np.inout.push(if np.sign { '+' } else { '-' }), + NumPoz::TkPR => (), NumPoz::TkFM => (), _ => unimplemented!(), @@ -946,10 +969,25 @@ mod tests { assert_eq!(" 4 8 5", i64_to_char(485, "9 9 9")?); assert_eq!(" 1,485", i64_to_char(1485, "9,999")?); - // assert_eq!(" 1 485", i64_to_char(1485, "9G999")?); assert_eq!("Good number: 485", i64_to_char(485, "\"Good number:\"999")?); + assert_eq!("+485", i64_to_char(485, "SG999")?); + assert_eq!("-485", i64_to_char(-485, "SG999")?); + assert_eq!("4-85", i64_to_char(-485, "9SG99")?); + + assert_eq!("+485", i64_to_char(485, "PL999")?); + assert_eq!(" 485", i64_to_char(-485, "PL999")?); + + assert_eq!("48+5", i64_to_char(485, "99PL9")?); + assert_eq!("48 5", i64_to_char(-485, "99PL9")?); + + assert_eq!("485-", i64_to_char(-485, "999MI")?); + assert_eq!("485 ", i64_to_char(485, "999MI")?); + assert_eq!("485", i64_to_char(485, "FM999MI")?); + + // assert_eq!(" 1 485", i64_to_char(1485, "9G999")?); + Ok(()) } @@ -979,18 +1017,13 @@ mod tests { // assert_eq!(" 148,500", f64_to_char(148.5, "999D999")?); // assert_eq!(" 3 148,500", f64_to_char(3148.5, "9G999D999")?); // assert_eq!("485-", f64_to_char(-485, "999S")?); - // assert_eq!("485-", f64_to_char(-485, "999MI")?); - // assert_eq!("485 ", f64_to_char(485, "999MI")?); - // assert_eq!("485", f64_to_char(485, "FM999MI")?); - // assert_eq!("+485", f64_to_char(485, "PL999")?); - // assert_eq!("+485", f64_to_char(485, "SG999")?); - // assert_eq!("-485", f64_to_char(-485, "SG999")?); - // assert_eq!("4-85", f64_to_char(-485, "9SG99")?); - // assert_eq!("<485>", f64_to_char(-485, "999PR")?); + // assert_eq!("DM 485", f64_to_char(485, "L999")?); + // assert_eq!(" CDLXXXV", f64_to_char(485, "RN")?); // assert_eq!("CDLXXXV", f64_to_char(485, "FMRN")?); // assert_eq!("V", f64_to_char(5.2, "FMRN")?); + // assert_eq!(" 482nd", f64_to_char(482, "999th")?); // assert_eq!(" 12000", f64_to_char(12, "99V999")?); diff --git a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test index 184a3115ab56..83546ca057a4 100644 --- a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test +++ b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test @@ -23,14 +23,15 @@ INSERT INTO INT64_TBL VALUES # 4,567,890,123,456,789 | 4,567,890,123,456,789 # 4,567,890,123,456,789 | -4,567,890,123,456,789 -# query T # SELECT to_char(q1, '9G999G999G999G999G999D999G999'), to_char(q2, '9,999,999,999,999,999.999,999') FROM INT64_TBL; -#--------------------------------+-------------------------------- -# 123.000,000 | 456.000,000 -# 123.000,000 | 4,567,890,123,456,789.000,000 -# 4,567,890,123,456,789.000,000 | 123.000,000 -# 4,567,890,123,456,789.000,000 | 4,567,890,123,456,789.000,000 -# 4,567,890,123,456,789.000,000 | -4,567,890,123,456,789.000,000 +query T +SELECT to_char(q1, '9,999,999,999,999,999.999,999'), to_char(q2, '9,999,999,999,999,999.999,999') FROM INT64_TBL; +---- + 123.000,000 456.000,000 + 123.000,000 4,567,890,123,456,789.000,000 + 4,567,890,123,456,789.000,000 123.000,000 + 4,567,890,123,456,789.000,000 4,567,890,123,456,789.000,000 + 4,567,890,123,456,789.000,000 -4,567,890,123,456,789.000,000 query T SELECT to_char( (q1 * -1), '9999999999999999PR'), to_char( (q2 * -1), '9999999999999999.999PR') FROM INT64_TBL; @@ -50,23 +51,24 @@ SELECT to_char( (q1 * -1), '9999999999999999PR'), to_char( (q2 * -1), '999999999 # 4567890123456789- | -4567890123456789 # 4567890123456789- | +4567890123456789 -# query T -# SELECT to_char(q2, 'MI9999999999999999') FROM INT64_TBL; -#------------------- -# 456 -# 4567890123456789 -# 123 -# 4567890123456789 -# -4567890123456789 +query T +SELECT to_char(q2, 'MI9999999999999999') FROM INT64_TBL; +---- + 456 + 4567890123456789 + 123 + 4567890123456789 + -4567890123456789 -# query T # SELECT to_char(q2, 'FMS9999999999999999') FROM INT64_TBL; -#------------------- -# +456 -# +4567890123456789 -# +123 -# +4567890123456789 -# -4567890123456789 +query T +SELECT to_char(q2, 'FMSG9999999999999999') FROM INT64_TBL; +---- + +456 + +4567890123456789 + +123 + +4567890123456789 + -4567890123456789 # query T # SELECT to_char(q2, 'FM9999999999999999THPR') FROM INT64_TBL; @@ -96,7 +98,7 @@ SELECT to_char(q2, '0999999999999999') FROM INT64_TBL; -4567890123456789 # query T -# SELECT to_char(q2, 'S0999999999999999') FROM INT64_TBL; +# SELECT to_char(q2, 'S0999999999999999') FROM INT64_TBL; #------------------- # +0000000000000456 # +4567890123456789 @@ -104,9 +106,8 @@ SELECT to_char(q2, '0999999999999999') FROM INT64_TBL; # +4567890123456789 # -4567890123456789 - query T -SELECT to_char(q2, 'FM0999999999999999') FROM INT64_TBL; +SELECT to_char(q2, 'FM0999999999999999') FROM INT64_TBL; ---- 0000000000000456 4567890123456789 @@ -161,8 +162,14 @@ SELECT to_char(q2, '99999 "text" 9999 "9999" 999 "\\"text between quote marks\\" 45678 text 9012 9999 345 "text between quote marks" 6789 -45678 text 9012 9999 345 "text between quote marks" 6789 -# query T -# SELECT to_char(q2, '999999SG9999999999') FROM INT64_TBL; +query T +SELECT to_char(q2, '999999SG9999999999') FROM INT64_TBL; +---- + + 456 + 456789+0123456789 + + 123 + 456789+0123456789 + 456789-0123456789 query T select to_char(123,'0099'); From 705a867d0741b9acca5fb6223326dffc388729f5 Mon Sep 17 00:00:00 2001 From: coldWater Date: Thu, 26 Sep 2024 10:24:03 +0800 Subject: [PATCH 9/9] fix Signed-off-by: coldWater --- .../suites/query/functions/02_0078_function_to_char.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test index 83546ca057a4..b73f95316b44 100644 --- a/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test +++ b/tests/sqllogictests/suites/query/functions/02_0078_function_to_char.test @@ -1,10 +1,7 @@ # https://github.com/postgres/postgres/blob/master/src/test/regress/expected/int8.out statement ok -DROP TABLE IF EXISTS INT64_TBL; - -statement ok -CREATE TABLE INT64_TBL(q1 int64, q2 int64); +CREATE OR REPLACE TABLE INT64_TBL(q1 int64, q2 int64); statement ok INSERT INTO INT64_TBL VALUES