From 64132ed6f2cd844dc083e62837e0b7e11df3b9e8 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 21 May 2024 20:36:15 +0800 Subject: [PATCH 01/39] feat(parser): the 1st version of a new parser-combinator style parser Signed-off-by: TennyZhuang --- Cargo.lock | 15 ++- src/sqlparser/Cargo.toml | 1 + src/sqlparser/src/lib.rs | 1 + src/sqlparser/src/parser_v2/data_type.rs | 161 +++++++++++++++++++++++ src/sqlparser/src/parser_v2/mod.rs | 142 ++++++++++++++++++++ src/sqlparser/src/parser_v2/number.rs | 38 ++++++ 6 files changed, 355 insertions(+), 3 deletions(-) create mode 100644 src/sqlparser/src/parser_v2/data_type.rs create mode 100644 src/sqlparser/src/parser_v2/mod.rs create mode 100644 src/sqlparser/src/parser_v2/number.rs diff --git a/Cargo.lock b/Cargo.lock index 6e7bd09af8b9..e7ba267f42d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11413,6 +11413,7 @@ dependencies = [ "serde", "tracing", "tracing-subscriber", + "winnow 0.6.8 (git+https://github.com/TennyZhuang/winnow.git?rev=509121449b451f0faa1b698b4e8cb36dfaa1a4b5)", "workspace-hack", ] @@ -14540,7 +14541,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.5", + "winnow 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -16220,9 +16221,17 @@ dependencies = [ [[package]] name = "winnow" -version = "0.6.5" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" +checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "0.6.8" +source = "git+https://github.com/TennyZhuang/winnow.git?rev=509121449b451f0faa1b698b4e8cb36dfaa1a4b5#509121449b451f0faa1b698b4e8cb36dfaa1a4b5" dependencies = [ "memchr", ] diff --git a/src/sqlparser/Cargo.toml b/src/sqlparser/Cargo.toml index ef57da9aa62f..2798d3852402 100644 --- a/src/sqlparser/Cargo.toml +++ b/src/sqlparser/Cargo.toml @@ -29,6 +29,7 @@ itertools = { workspace = true } serde = { version = "1.0", features = ["derive"], optional = true } tracing = "0.1" tracing-subscriber = "0.3" +winnow = { version = "0.6.8", git = "https://github.com/TennyZhuang/winnow.git", rev = "509121449b451f0faa1b698b4e8cb36dfaa1a4b5" } [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/sqlparser/src/lib.rs b/src/sqlparser/src/lib.rs index 612b11078eac..a102e5428eda 100644 --- a/src/sqlparser/src/lib.rs +++ b/src/sqlparser/src/lib.rs @@ -45,6 +45,7 @@ extern crate alloc; pub mod ast; pub mod keywords; pub mod parser; +pub mod parser_v2; pub mod tokenizer; #[doc(hidden)] diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs new file mode 100644 index 000000000000..496065ca5ed1 --- /dev/null +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -0,0 +1,161 @@ +use core::cell::RefCell; +use std::rc::Rc; + +use winnow::combinator::{alt, delimited, dispatch, empty, fail, opt, separated, seq}; +use winnow::error::{ContextError, ErrMode}; +use winnow::{PResult, Parser, Stateful}; + +use super::{ + identifier_non_reserved, keyword, literal_uint, precision_in_range, token, with_state, + TokenStream, +}; +use crate::ast::{DataType, StructField}; +use crate::keywords::Keyword; +use crate::tokenizer::Token; + +#[derive(Default, Debug)] +struct DataTypeParsingState { + /// When we consumed an [`Token::ShiftRight`], we set this to true. + remaining_close: Rc>, +} + +type StatefulStream = Stateful; + +fn struct_data_type(input: &mut StatefulStream) -> PResult> +where + S: TokenStream, +{ + let remaining_close1 = input.state.remaining_close.clone(); + let remaining_close2 = input.state.remaining_close.clone(); + + let consume_close = alt(( + move |_input: &mut StatefulStream| -> PResult<()> { + if *remaining_close1.borrow() { + Ok(()) + } else { + Err(ErrMode::Backtrack(ContextError::new())) + } + } + .void(), + ( + Token::ShiftRight, + move |_input: &mut StatefulStream| -> PResult<()> { + *remaining_close2.borrow_mut() = true; + Ok(()) + }, + ) + .void(), + Token::Gt.void(), + )); + + delimited( + Token::Lt, + separated( + 1.., + seq! { + StructField { + name: identifier_non_reserved, + _: Token::Colon, + data_type: data_type_stateful, + } + }, + Token::Comma, + ), + consume_close, + ) + .parse_next(input) +} + +pub fn data_type(input: &mut S) -> PResult +where + S: TokenStream, +{ + with_state::(data_type_stateful).parse_next(input) +} + +fn data_type_stateful(input: &mut StatefulStream) -> PResult +where + S: TokenStream, +{ + ( + data_type_stateful_inner, + opt((Token::LBracket, Token::RBracket)), + ) + .map(|(dt, is_array)| { + if is_array.is_some() { + DataType::Array(Box::new(dt)) + } else { + dt + } + }) + .parse_next(input) +} + +fn data_type_stateful_inner(input: &mut StatefulStream) -> PResult +where + S: TokenStream, +{ + let with_time_zone = || { + opt(alt(( + (Keyword::WITH, Keyword::TIME, Keyword::ZONE).value(true), + (Keyword::WITHOUT, Keyword::TIME, Keyword::ZONE).value(false), + ))) + .map(|x| x.unwrap_or(false)) + }; + + let precision_and_scale = || { + opt(delimited( + Token::LParen, + ( + literal_uint, + opt((Token::Comma, literal_uint).map(|(_, x)| x)), + ), + Token::RParen, + )) + .map(|p| match p { + Some((x, y)) => (Some(x), y), + None => (None, None), + }) + }; + + let keywords = dispatch! {keyword; + Keyword::BOOLEAN | Keyword::BOOL => empty.value(DataType::Boolean), + Keyword::FLOAT => opt(precision_in_range(1..53)).map(|precision| DataType::Float(precision)), + Keyword::REAL => empty.value(DataType::Real), + Keyword::DOUBLE => Keyword::PRECISION.value(DataType::Double), + Keyword::SMALLINT => empty.value(DataType::SmallInt), + Keyword::INT | Keyword::INTEGER => empty.value(DataType::Int), + Keyword::BIGINT => empty.value(DataType::BigInt), + Keyword::STRING | Keyword::VARCHAR => empty.value(DataType::Varchar), + Keyword::CHAR | Keyword::CHARACTER => dispatch! {keyword; + Keyword::VARYING => empty.value(DataType::Varchar), + _ => opt(precision_in_range(..)).map(|precision| DataType::Char(precision)), + }, + Keyword::UUID => empty.value(DataType::Uuid), + Keyword::DATE => empty.value(DataType::Date), + Keyword::TIMESTAMP => with_time_zone().map(|with_tz| DataType::Timestamp(with_tz)), + Keyword::TIME => with_time_zone().map(|with_tz| DataType::Time(with_tz)), + // TODO: Support complex inverval type parsing. + Keyword::INTERVAL => empty.value(DataType::Interval), + Keyword::REGCLASS => empty.value(DataType::Regclass), + Keyword::REGPROC => empty.value(DataType::Regproc), + Keyword::STRUCT => struct_data_type.map(DataType::Struct), + Keyword::BYTEA => empty.value(DataType::Bytea), + Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => precision_and_scale().map(|(precision, scale)| { + DataType::Decimal(precision, scale) + }), + _ => fail::<_, DataType, _>, + }; + + alt(( + keywords, + // JSONB is not a keyword, but a special data type. + token + .verify(|t| match &t.token { + Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb") => true, + _ => false, + }) + .value(DataType::Jsonb), + )) + .parse_next(input) +} diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs new file mode 100644 index 000000000000..642669e2aa87 --- /dev/null +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -0,0 +1,142 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use winnow::error::ContextError; +use winnow::stream::{Location, Stream, StreamIsPartial}; +use winnow::token::any; +use winnow::{PResult, Parser, Stateful}; + +use crate::ast::Ident; +use crate::keywords::{self, Keyword}; +use crate::tokenizer::{Token, TokenWithLocation}; + +mod data_type; +mod number; + +pub(crate) use data_type::*; +pub(crate) use number::*; + +trait TokenStream: Stream + StreamIsPartial + Location + Default {} + +impl TokenStream for S where + S: Stream + StreamIsPartial + Location + Default +{ +} + +fn token(input: &mut S) -> PResult +where + S: TokenStream, +{ + any(input) +} + +fn keyword(input: &mut S) -> PResult +where + S: TokenStream, +{ + token + .verify_map(|t| match &t.token { + Token::Word(w) if w.keyword != Keyword::NoKeyword => Some(w.keyword), + _ => None, + }) + .parse_next(input) +} + +impl Parser for Token +where + I: TokenStream, +{ + fn parse_next(&mut self, input: &mut I) -> PResult { + token + .verify(move |t: &TokenWithLocation| t.token == *self) + .parse_next(input) + } +} + +impl Parser for Keyword +where + I: TokenStream, +{ + fn parse_next(&mut self, input: &mut I) -> PResult { + token + .verify_map(move |t| match &t.token { + Token::Word(w) if *self == w.keyword => Some(w.keyword), + _ => None, + }) + .parse_next(input) + } +} + +fn identifier_non_reserved(input: &mut S) -> PResult +where + S: TokenStream, +{ + // FIXME: Reporting error correctly. + token + .verify_map(|t| match &t.token { + Token::Word(w) if !keywords::RESERVED_FOR_COLUMN_OR_TABLE_NAME.contains(&w.keyword) => { + w.to_ident().ok() + } + _ => None, + }) + .parse_next(input) +} + +fn with_state(mut parse_next: ParseNext) -> impl Parser +where + S: TokenStream, + State: Default, + ParseNext: Parser, O, ContextError>, +{ + move |input: &mut S| -> PResult { + let state = State::default(); + let input2 = std::mem::take(input); + let mut stateful = Stateful { + input: input2, + state, + }; + let output = parse_next.parse_next(&mut stateful); + *input = stateful.input; + output + } +} + +#[cfg(test)] +mod tests { + use winnow::Located; + + use super::*; + use crate::tokenizer::Tokenizer; + + #[test] + fn test_basic() { + let input = "SELECT 1"; + let tokens = Tokenizer::new(input).tokenize_with_location().unwrap(); + let mut token_stream = Located::new(&*tokens); + Token::make_keyword("SELECT") + .parse_next(&mut token_stream) + .unwrap(); + } + + #[test] + fn test_stateful() { + let input = "SELECT 1"; + let tokens = Tokenizer::new(input).tokenize_with_location().unwrap(); + let mut token_stream = Located::new(&*tokens); + with_state(|input: &mut Stateful<_, usize>| -> PResult<()> { + input.state += 1; + Token::make_keyword("SELECT").void().parse_next(input) + }) + .parse_next(&mut token_stream) + .unwrap(); + } +} diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs new file mode 100644 index 000000000000..50de5e9f7d05 --- /dev/null +++ b/src/sqlparser/src/parser_v2/number.rs @@ -0,0 +1,38 @@ +use core::ops::{Range, RangeBounds}; + +use winnow::combinator::{delimited, opt}; +use winnow::error::ContextError; +use winnow::{PResult, Parser}; + +use super::{token, TokenStream}; +use crate::tokenizer::Token; + +pub fn token_number(input: &mut S) -> PResult +where + S: TokenStream, +{ + token + .verify_map(|t| { + if let Token::Number(number) = t.token { + Some(number) + } else { + None + } + }) + .parse_next(input) +} + +/// Parse an unsigned literal integer/long +pub fn literal_uint(input: &mut S) -> PResult +where + S: TokenStream, +{ + token_number.try_map(|s| s.parse::()).parse_next(input) +} + +pub fn precision_in_range(range: impl RangeBounds) -> impl Parser +where + S: TokenStream, +{ + delimited(Token::LParen, literal_uint, Token::LParen).verify(move |v| range.contains(v)) +} From f68d97e80d43a07f25a755ca1a8eabe53bee967d Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 21 May 2024 21:04:53 +0800 Subject: [PATCH 02/39] add parse_v2 Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 9b86b098b14b..9bfe5aa26ab3 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -21,6 +21,7 @@ use alloc::{ vec::Vec, }; use core::fmt; +use std::fmt::format; use itertools::Itertools; use tracing::{debug, instrument}; @@ -187,6 +188,29 @@ impl Parser { } } + pub(crate) fn parse_v2<'a, O>( + &'a mut self, + mut parse_next: impl winnow::Parser< + winnow::Located<&'a [TokenWithLocation]>, + O, + winnow::error::ContextError, + >, + ) -> Result { + use winnow::stream::Location; + + let mut token_stream = winnow::Located::new(&*self.tokens); + let output = parse_next.parse_next(&mut token_stream).map_err(|e| { + ParserError::ParserError(format!( + "Error parsing SQL at {}: {}", + token_stream.location(), + e + )) + }); + let offset = token_stream.location(); + self.index += offset; + output + } + /// Parse a SQL statement and produce an Abstract Syntax Tree (AST) #[instrument(level = "debug")] pub fn parse_sql(sql: &str) -> Result, ParserError> { From 12bfadf7b152e820de64f1472b7dfd5813f57e55 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 21 May 2024 22:51:50 +0800 Subject: [PATCH 03/39] Update src/sqlparser/src/parser_v2/data_type.rs --- src/sqlparser/src/parser_v2/data_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 496065ca5ed1..6503c4c90e85 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -135,7 +135,7 @@ where Keyword::DATE => empty.value(DataType::Date), Keyword::TIMESTAMP => with_time_zone().map(|with_tz| DataType::Timestamp(with_tz)), Keyword::TIME => with_time_zone().map(|with_tz| DataType::Time(with_tz)), - // TODO: Support complex inverval type parsing. + // TODO: Support complex interval type parsing. Keyword::INTERVAL => empty.value(DataType::Interval), Keyword::REGCLASS => empty.value(DataType::Regclass), Keyword::REGPROC => empty.value(DataType::Regproc), From b0a0afda73c2355bdbe7c06da11da8ed0b1f2a08 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 14:36:07 +0800 Subject: [PATCH 04/39] minor fix Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 142 +---------------------- src/sqlparser/src/parser_v2/data_type.rs | 17 ++- src/sqlparser/src/parser_v2/mod.rs | 5 +- src/sqlparser/src/parser_v2/number.rs | 4 +- 4 files changed, 17 insertions(+), 151 deletions(-) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 9bfe5aa26ab3..c391ee76f1e0 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -21,13 +21,13 @@ use alloc::{ vec::Vec, }; use core::fmt; -use std::fmt::format; use itertools::Itertools; use tracing::{debug, instrument}; use crate::ast::*; use crate::keywords::{self, Keyword}; +use crate::parser_v2; use crate::tokenizer::*; pub(crate) const UPSTREAM_SOURCE_KEY: &str = "connector"; @@ -173,19 +173,12 @@ pub struct Parser { tokens: Vec, /// The index of the first unprocessed token in `self.tokens` index: usize, - /// Since we cannot distinguish `>>` and double `>`, so use `angle_brackets_num` to store the - /// number of `<` to match `>` in sql like `struct>`. - angle_brackets_num: i32, } impl Parser { /// Parse the specified tokens pub fn new(tokens: Vec) -> Self { - Parser { - tokens, - index: 0, - angle_brackets_num: 0, - } + Parser { tokens, index: 0 } } pub(crate) fn parse_v2<'a, O>( @@ -3830,136 +3823,7 @@ impl Parser { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) and convert /// into an array of that datatype if needed pub fn parse_data_type(&mut self) -> Result { - let mut data_type = self.parse_data_type_inner()?; - while self.consume_token(&Token::LBracket) { - self.expect_token(&Token::RBracket)?; - data_type = DataType::Array(Box::new(data_type)); - } - Ok(data_type) - } - - /// Parse struct `data_type` e.g.`>`. - pub fn parse_struct_data_type(&mut self) -> Result, ParserError> { - let mut columns = vec![]; - if !self.consume_token(&Token::Lt) { - return self.expected("'<' after struct", self.peek_token()); - } - self.angle_brackets_num += 1; - - loop { - if let Token::Word(_) = self.peek_token().token { - let name = self.parse_identifier_non_reserved()?; - let data_type = self.parse_data_type()?; - columns.push(StructField { name, data_type }) - } else { - return self.expected("struct field name", self.peek_token()); - } - if self.angle_brackets_num == 0 { - break; - } else if self.consume_token(&Token::Gt) { - self.angle_brackets_num -= 1; - break; - } else if self.consume_token(&Token::ShiftRight) { - if self.angle_brackets_num >= 1 { - self.angle_brackets_num -= 2; - break; - } else { - return parser_err!("too much '>'"); - } - } else if !self.consume_token(&Token::Comma) { - return self.expected("',' or '>' after column definition", self.peek_token()); - } - } - - Ok(columns) - } - - /// Parse a SQL datatype - pub fn parse_data_type_inner(&mut self) -> Result { - let token = self.next_token(); - match token.token { - Token::Word(w) => match w.keyword { - Keyword::BOOLEAN | Keyword::BOOL => Ok(DataType::Boolean), - Keyword::FLOAT => { - let precision = self.parse_optional_precision()?; - match precision { - Some(0) => Err(ParserError::ParserError( - "precision for type float must be at least 1 bit".to_string(), - )), - Some(54..) => Err(ParserError::ParserError( - "precision for type float must be less than 54 bits".to_string(), - )), - _ => Ok(DataType::Float(precision)), - } - } - Keyword::REAL => Ok(DataType::Real), - Keyword::DOUBLE => { - let _ = self.parse_keyword(Keyword::PRECISION); - Ok(DataType::Double) - } - Keyword::SMALLINT => Ok(DataType::SmallInt), - Keyword::INT | Keyword::INTEGER => Ok(DataType::Int), - Keyword::BIGINT => Ok(DataType::BigInt), - Keyword::STRING | Keyword::VARCHAR => Ok(DataType::Varchar), - Keyword::CHAR | Keyword::CHARACTER => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::Varchar) - } else { - Ok(DataType::Char(self.parse_optional_precision()?)) - } - } - Keyword::UUID => Ok(DataType::Uuid), - Keyword::DATE => Ok(DataType::Date), - Keyword::TIMESTAMP => { - let with_time_zone = self.parse_keyword(Keyword::WITH); - if with_time_zone || self.parse_keyword(Keyword::WITHOUT) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - } - Ok(DataType::Timestamp(with_time_zone)) - } - Keyword::TIME => { - let with_time_zone = self.parse_keyword(Keyword::WITH); - if with_time_zone || self.parse_keyword(Keyword::WITHOUT) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - } - Ok(DataType::Time(with_time_zone)) - } - // Interval types can be followed by a complicated interval - // qualifier that we don't currently support. See - // parse_interval_literal for a taste. - Keyword::INTERVAL => Ok(DataType::Interval), - Keyword::REGCLASS => Ok(DataType::Regclass), - Keyword::REGPROC => Ok(DataType::Regproc), - Keyword::TEXT => { - if self.consume_token(&Token::LBracket) { - // Note: this is postgresql-specific - self.expect_token(&Token::RBracket)?; - Ok(DataType::Array(Box::new(DataType::Text))) - } else { - Ok(DataType::Text) - } - } - Keyword::STRUCT => Ok(DataType::Struct(self.parse_struct_data_type()?)), - Keyword::BYTEA => Ok(DataType::Bytea), - Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => { - let (precision, scale) = self.parse_optional_precision_scale()?; - Ok(DataType::Decimal(precision, scale)) - } - _ => { - self.prev_token(); - let type_name = self.parse_object_name()?; - // JSONB is not a keyword - if type_name.to_string().eq_ignore_ascii_case("jsonb") { - Ok(DataType::Jsonb) - } else { - Ok(DataType::Custom(type_name)) - } - } - }, - unexpected => { - self.expected("a data type name", unexpected.with_location(token.location)) - } - } + self.parse_v2(parser_v2::data_type) } /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 6503c4c90e85..298063dfe795 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -15,7 +15,9 @@ use crate::tokenizer::Token; #[derive(Default, Debug)] struct DataTypeParsingState { - /// When we consumed an [`Token::ShiftRight`], we set this to true. + /// Since we can't distinguish between `>>` and `> >` in lexer, we need to handle this case in the parser. + /// When we want a [`>`][Token::Gt] but actually consumed a [`>>`][Token::ShiftRight], we set this to true. + /// When the value was true and we want a [`>`][Token::Gt], we just set this to false instead of really consume it. remaining_close: Rc>, } @@ -120,7 +122,7 @@ where let keywords = dispatch! {keyword; Keyword::BOOLEAN | Keyword::BOOL => empty.value(DataType::Boolean), - Keyword::FLOAT => opt(precision_in_range(1..53)).map(|precision| DataType::Float(precision)), + Keyword::FLOAT => opt(precision_in_range(1..53)).map(DataType::Float), Keyword::REAL => empty.value(DataType::Real), Keyword::DOUBLE => Keyword::PRECISION.value(DataType::Double), Keyword::SMALLINT => empty.value(DataType::SmallInt), @@ -129,12 +131,12 @@ where Keyword::STRING | Keyword::VARCHAR => empty.value(DataType::Varchar), Keyword::CHAR | Keyword::CHARACTER => dispatch! {keyword; Keyword::VARYING => empty.value(DataType::Varchar), - _ => opt(precision_in_range(..)).map(|precision| DataType::Char(precision)), + _ => opt(precision_in_range(..)).map(DataType::Char), }, Keyword::UUID => empty.value(DataType::Uuid), Keyword::DATE => empty.value(DataType::Date), - Keyword::TIMESTAMP => with_time_zone().map(|with_tz| DataType::Timestamp(with_tz)), - Keyword::TIME => with_time_zone().map(|with_tz| DataType::Time(with_tz)), + Keyword::TIMESTAMP => with_time_zone().map(DataType::Timestamp), + Keyword::TIME => with_time_zone().map(DataType::Time), // TODO: Support complex interval type parsing. Keyword::INTERVAL => empty.value(DataType::Interval), Keyword::REGCLASS => empty.value(DataType::Regclass), @@ -151,10 +153,7 @@ where keywords, // JSONB is not a keyword, but a special data type. token - .verify(|t| match &t.token { - Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb") => true, - _ => false, - }) + .verify(|t| matches!(&t.token, Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb"))) .value(DataType::Jsonb), )) .parse_next(input) diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 642669e2aa87..7012b9880adc 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -25,7 +25,10 @@ mod number; pub(crate) use data_type::*; pub(crate) use number::*; -trait TokenStream: Stream + StreamIsPartial + Location + Default {} +pub trait TokenStream: + Stream + StreamIsPartial + Location + Default +{ +} impl TokenStream for S where S: Stream + StreamIsPartial + Location + Default diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 50de5e9f7d05..2efaf00af2f3 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -1,6 +1,6 @@ -use core::ops::{Range, RangeBounds}; +use core::ops::RangeBounds; -use winnow::combinator::{delimited, opt}; +use winnow::combinator::delimited; use winnow::error::ContextError; use winnow::{PResult, Parser}; From 8ea355a209f30ed603ae8c404c48ed0873d292d1 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 14:38:44 +0800 Subject: [PATCH 05/39] minor improvement Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 298063dfe795..9d1a9ccef1eb 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -31,11 +31,11 @@ where let remaining_close2 = input.state.remaining_close.clone(); let consume_close = alt(( - move |_input: &mut StatefulStream| -> PResult<()> { + move |input: &mut StatefulStream| -> PResult<()> { if *remaining_close1.borrow() { Ok(()) } else { - Err(ErrMode::Backtrack(ContextError::new())) + fail(input) } } .void(), @@ -146,7 +146,7 @@ where Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => precision_and_scale().map(|(precision, scale)| { DataType::Decimal(precision, scale) }), - _ => fail::<_, DataType, _>, + _ => fail, }; alt(( From 332234b4d751cd01058dd2e6fa8909244de5b4e5 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 14:43:37 +0800 Subject: [PATCH 06/39] minor fix Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 9d1a9ccef1eb..0c0a710ac4b5 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -2,7 +2,6 @@ use core::cell::RefCell; use std::rc::Rc; use winnow::combinator::{alt, delimited, dispatch, empty, fail, opt, separated, seq}; -use winnow::error::{ContextError, ErrMode}; use winnow::{PResult, Parser, Stateful}; use super::{ @@ -15,7 +14,7 @@ use crate::tokenizer::Token; #[derive(Default, Debug)] struct DataTypeParsingState { - /// Since we can't distinguish between `>>` and `> >` in lexer, we need to handle this case in the parser. + /// Since we can't distinguish between `>>` and `> >` in tokenizer, we need to handle this case in the parser. /// When we want a [`>`][Token::Gt] but actually consumed a [`>>`][Token::ShiftRight], we set this to true. /// When the value was true and we want a [`>`][Token::Gt], we just set this to false instead of really consume it. remaining_close: Rc>, From 2ccd163683c955cf54fd1ace98e9e9fac79c4d08 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 15:39:41 +0800 Subject: [PATCH 07/39] add some label Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 4 ++-- src/sqlparser/src/parser_v2/data_type.rs | 6 +++++- src/sqlparser/src/parser_v2/mod.rs | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index c391ee76f1e0..47029a6b1564 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -194,8 +194,8 @@ impl Parser { let mut token_stream = winnow::Located::new(&*self.tokens); let output = parse_next.parse_next(&mut token_stream).map_err(|e| { ParserError::ParserError(format!( - "Error parsing SQL at {}: {}", - token_stream.location(), + "Unexpected {}: {}", + self.tokens[self.index + token_stream.location()], e )) }); diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 0c0a710ac4b5..f50058ee4b49 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -2,6 +2,7 @@ use core::cell::RefCell; use std::rc::Rc; use winnow::combinator::{alt, delimited, dispatch, empty, fail, opt, separated, seq}; +use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; use super::{ @@ -71,7 +72,9 @@ pub fn data_type(input: &mut S) -> PResult where S: TokenStream, { - with_state::(data_type_stateful).parse_next(input) + with_state::(data_type_stateful) + .context(StrContext::Label("data_type")) + .parse_next(input) } fn data_type_stateful(input: &mut StatefulStream) -> PResult @@ -155,5 +158,6 @@ where .verify(|t| matches!(&t.token, Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb"))) .value(DataType::Jsonb), )) + .context(StrContext::Label("data_type_inner")) .parse_next(input) } diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 7012b9880adc..3b29a94e933b 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use winnow::error::ContextError; +use winnow::error::{ContextError, StrContext}; use winnow::stream::{Location, Stream, StreamIsPartial}; use winnow::token::any; use winnow::{PResult, Parser, Stateful}; @@ -51,6 +51,7 @@ where Token::Word(w) if w.keyword != Keyword::NoKeyword => Some(w.keyword), _ => None, }) + .context(StrContext::Label("keyword")) .parse_next(input) } From 5a3b34d557df87454ccce5030271014b1d9e1616 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 17:39:30 +0800 Subject: [PATCH 08/39] introduce TokenStreamWrapper for better readability Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/impl_.rs | 139 +++++++++++++++++++++++++++ src/sqlparser/src/parser_v2/mod.rs | 3 + 2 files changed, 142 insertions(+) create mode 100644 src/sqlparser/src/parser_v2/impl_.rs diff --git a/src/sqlparser/src/parser_v2/impl_.rs b/src/sqlparser/src/parser_v2/impl_.rs new file mode 100644 index 000000000000..1d9c7b8833dc --- /dev/null +++ b/src/sqlparser/src/parser_v2/impl_.rs @@ -0,0 +1,139 @@ +use winnow::stream::{Checkpoint, Offset, SliceLen, Stream, StreamIsPartial, UpdateSlice}; + +use crate::tokenizer::{Token, TokenWithLocation, Whitespace}; + +#[derive(Copy, Clone, Debug)] +pub struct CheckpointWrapper<'a>(Checkpoint<&'a [TokenWithLocation], &'a [TokenWithLocation]>); + +impl<'a> Offset> for CheckpointWrapper<'a> { + #[inline(always)] + fn offset_from(&self, start: &Self) -> usize { + self.0.offset_from(&start.0) + } +} + +#[derive(Default, Copy, Clone)] +pub struct TokenStreamWrapper<'a> { + pub tokens: &'a [TokenWithLocation], +} + +impl<'a> std::fmt::Debug for TokenStreamWrapper<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for tok in self.tokens { + let tok = &tok.token; + if matches!(tok, Token::Whitespace(Whitespace::Newline)) { + write!(f, "\\n")?; + } else { + write!(f, "{}", tok)?; + } + } + Ok(()) + } +} + +impl<'a> Offset> for TokenStreamWrapper<'a> { + #[inline(always)] + fn offset_from(&self, start: &Self) -> usize { + self.tokens.offset_from(&start.tokens) + } +} + +impl<'a> Offset> for TokenStreamWrapper<'a> { + #[inline(always)] + fn offset_from(&self, start: &CheckpointWrapper<'a>) -> usize { + self.tokens.offset_from(&start.0) + } +} + +impl<'a> SliceLen for TokenStreamWrapper<'a> { + #[inline(always)] + fn slice_len(&self) -> usize { + self.tokens.len() + } +} + +impl<'a> StreamIsPartial for TokenStreamWrapper<'a> { + type PartialState = <&'a [TokenWithLocation] as StreamIsPartial>::PartialState; + + #[must_use] + #[inline(always)] + fn complete(&mut self) -> Self::PartialState { + self.tokens.complete() + } + + #[inline(always)] + fn restore_partial(&mut self, state: Self::PartialState) { + self.tokens.restore_partial(state) + } + + #[inline(always)] + fn is_partial_supported() -> bool { + <&'a [TokenWithLocation] as StreamIsPartial>::is_partial_supported() + } +} + +impl<'a> Stream for TokenStreamWrapper<'a> { + type Checkpoint = CheckpointWrapper<'a>; + type IterOffsets = <&'a [TokenWithLocation] as Stream>::IterOffsets; + type Slice = TokenStreamWrapper<'a>; + type Token = <&'a [TokenWithLocation] as Stream>::Token; + + #[inline(always)] + fn iter_offsets(&self) -> Self::IterOffsets { + self.tokens.iter_offsets() + } + + #[inline(always)] + fn eof_offset(&self) -> usize { + self.tokens.eof_offset() + } + + #[inline(always)] + fn next_token(&mut self) -> Option { + self.tokens.next_token() + } + + #[inline(always)] + fn offset_for

(&self, predicate: P) -> Option + where + P: Fn(Self::Token) -> bool, + { + self.tokens.offset_for(predicate) + } + + #[inline(always)] + fn offset_at(&self, tokens: usize) -> Result { + self.tokens.offset_at(tokens) + } + + #[inline(always)] + fn next_slice(&mut self, offset: usize) -> Self::Slice { + TokenStreamWrapper { + tokens: &self.tokens.next_slice(offset), + } + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + CheckpointWrapper(self.tokens.checkpoint()) + } + + #[inline(always)] + fn reset(&mut self, checkpoint: &Self::Checkpoint) { + self.tokens.reset(&checkpoint.0) + } + + #[inline(always)] + fn raw(&self) -> &dyn std::fmt::Debug { + self + } +} + +impl<'a> UpdateSlice for TokenStreamWrapper<'a> { + #[inline(always)] + fn update_slice(self, inner: Self::Slice) -> Self { + TokenStreamWrapper { + tokens: self.tokens.update_slice(inner.tokens), + } + } +} diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 3b29a94e933b..bbf8d1dcc091 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -20,9 +20,11 @@ use crate::keywords::{self, Keyword}; use crate::tokenizer::{Token, TokenWithLocation}; mod data_type; +mod impl_; mod number; pub(crate) use data_type::*; +pub(crate) use impl_::TokenStreamWrapper; pub(crate) use number::*; pub trait TokenStream: @@ -104,6 +106,7 @@ where move |input: &mut S| -> PResult { let state = State::default(); let input2 = std::mem::take(input); + dbg!(&input2); let mut stateful = Stateful { input: input2, state, From 6c9683ac27fc82c0473b49d62c5a525b9435334a Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 17:40:13 +0800 Subject: [PATCH 09/39] fix parse_v2 Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 47029a6b1564..51d9b100e11b 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -184,14 +184,16 @@ impl Parser { pub(crate) fn parse_v2<'a, O>( &'a mut self, mut parse_next: impl winnow::Parser< - winnow::Located<&'a [TokenWithLocation]>, + winnow::Located>, O, winnow::error::ContextError, >, ) -> Result { use winnow::stream::Location; - let mut token_stream = winnow::Located::new(&*self.tokens); + let mut token_stream = winnow::Located::new(parser_v2::TokenStreamWrapper { + tokens: &self.tokens[self.index..], + }); let output = parse_next.parse_next(&mut token_stream).map_err(|e| { ParserError::ParserError(format!( "Unexpected {}: {}", From 905fa6ac44ac466b94a9286039e228630a9d837c Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 18:06:22 +0800 Subject: [PATCH 10/39] handle whitespace Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/mod.rs | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index bbf8d1dcc091..faa777bed5bc 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -10,9 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use winnow::combinator::preceded; use winnow::error::{ContextError, StrContext}; -use winnow::stream::{Location, Stream, StreamIsPartial}; -use winnow::token::any; +use winnow::stream::{ContainsToken, Location, Stream, StreamIsPartial}; +use winnow::token::{any, take_while}; use winnow::{PResult, Parser, Stateful}; use crate::ast::Ident; @@ -37,13 +38,28 @@ impl TokenStream for S where { } -fn token(input: &mut S) -> PResult +fn any_token(input: &mut S) -> PResult where S: TokenStream, { any(input) } +fn token(input: &mut S) -> PResult +where + S: TokenStream, +{ + struct WhiteSpace; + + impl ContainsToken for WhiteSpace { + fn contains_token(&self, token: TokenWithLocation) -> bool { + matches!(token.token, Token::Whitespace(_)) + } + } + + preceded(take_while(0.., WhiteSpace), any_token).parse_next(input) +} + fn keyword(input: &mut S) -> PResult where S: TokenStream, From 80208efd3eb4e283ad83f46fa6837705c0fb060d Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 18:07:29 +0800 Subject: [PATCH 11/39] use preceed Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index f50058ee4b49..50f415d30e6a 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -1,7 +1,7 @@ use core::cell::RefCell; use std::rc::Rc; -use winnow::combinator::{alt, delimited, dispatch, empty, fail, opt, separated, seq}; +use winnow::combinator::{alt, delimited, dispatch, empty, fail, opt, preceded, separated, seq}; use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; @@ -110,10 +110,7 @@ where let precision_and_scale = || { opt(delimited( Token::LParen, - ( - literal_uint, - opt((Token::Comma, literal_uint).map(|(_, x)| x)), - ), + (literal_uint, opt(preceded(Token::Comma, literal_uint))), Token::RParen, )) .map(|p| match p { From 9c9472f45b6cc9b3ed22b9246a0b2e435cd10178 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 18:09:04 +0800 Subject: [PATCH 12/39] remove dbg Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index faa777bed5bc..6ebb666f2abf 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -122,7 +122,6 @@ where move |input: &mut S| -> PResult { let state = State::default(); let input2 = std::mem::take(input); - dbg!(&input2); let mut stateful = Stateful { input: input2, state, From 2d6692c5d4a6ae7582a5447c82e9132c9a7152cc Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Wed, 22 May 2024 18:23:46 +0800 Subject: [PATCH 13/39] fix custom data type Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 50f415d30e6a..3a7b031663cb 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -9,7 +9,7 @@ use super::{ identifier_non_reserved, keyword, literal_uint, precision_in_range, token, with_state, TokenStream, }; -use crate::ast::{DataType, StructField}; +use crate::ast::{DataType, Ident, ObjectName, StructField}; use crate::keywords::Keyword; use crate::tokenizer::Token; @@ -150,10 +150,15 @@ where alt(( keywords, - // JSONB is not a keyword, but a special data type. - token - .verify(|t| matches!(&t.token, Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb"))) - .value(DataType::Jsonb), + token.verify_map(|t| match t.token { + // JSONB is not a keyword, but a special data type. + Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb") => Some(DataType::Jsonb), + // FIXME: Really parse a full object name here. + Token::Word(w) => Some(DataType::Custom(ObjectName::from(vec![ + Ident::new_unchecked(w.value), + ]))), + _ => None, + }), )) .context(StrContext::Label("data_type_inner")) .parse_next(input) From 7e8166dcc442d7284f8dfeeeda38bd8f9d9a8fc5 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 12:14:27 +0800 Subject: [PATCH 14/39] fix array parsing Signed-off-by: TennyZhuang --- Cargo.lock | 4 ++-- src/sqlparser/Cargo.toml | 7 ++----- src/sqlparser/src/parser_v2/data_type.rs | 18 +++++++----------- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e7ba267f42d1..7827afb0a348 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11413,7 +11413,7 @@ dependencies = [ "serde", "tracing", "tracing-subscriber", - "winnow 0.6.8 (git+https://github.com/TennyZhuang/winnow.git?rev=509121449b451f0faa1b698b4e8cb36dfaa1a4b5)", + "winnow 0.6.8 (git+https://github.com/TennyZhuang/winnow.git?rev=a6b1f04)", "workspace-hack", ] @@ -16231,7 +16231,7 @@ dependencies = [ [[package]] name = "winnow" version = "0.6.8" -source = "git+https://github.com/TennyZhuang/winnow.git?rev=509121449b451f0faa1b698b4e8cb36dfaa1a4b5#509121449b451f0faa1b698b4e8cb36dfaa1a4b5" +source = "git+https://github.com/TennyZhuang/winnow.git?rev=a6b1f04#a6b1f04cbe9b39d218da5a121e610144ebf961b0" dependencies = [ "memchr", ] diff --git a/src/sqlparser/Cargo.toml b/src/sqlparser/Cargo.toml index 2798d3852402..f55a7823e5f5 100644 --- a/src/sqlparser/Cargo.toml +++ b/src/sqlparser/Cargo.toml @@ -1,10 +1,7 @@ [package] name = "risingwave_sqlparser" license = "Apache-2.0" -include = [ - "src/**/*.rs", - "Cargo.toml", -] +include = ["src/**/*.rs", "Cargo.toml"] version = { workspace = true } edition = { workspace = true } homepage = { workspace = true } @@ -29,7 +26,7 @@ itertools = { workspace = true } serde = { version = "1.0", features = ["derive"], optional = true } tracing = "0.1" tracing-subscriber = "0.3" -winnow = { version = "0.6.8", git = "https://github.com/TennyZhuang/winnow.git", rev = "509121449b451f0faa1b698b4e8cb36dfaa1a4b5" } +winnow = { version = "0.6.8", git = "https://github.com/TennyZhuang/winnow.git", rev = "a6b1f04" } [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 3a7b031663cb..33f5caa7aa22 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -1,7 +1,9 @@ use core::cell::RefCell; use std::rc::Rc; -use winnow::combinator::{alt, delimited, dispatch, empty, fail, opt, preceded, separated, seq}; +use winnow::combinator::{ + alt, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, Repeat, +}; use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; @@ -81,16 +83,10 @@ fn data_type_stateful(input: &mut StatefulStream) -> PResult where S: TokenStream, { - ( - data_type_stateful_inner, - opt((Token::LBracket, Token::RBracket)), - ) - .map(|(dt, is_array)| { - if is_array.is_some() { - DataType::Array(Box::new(dt)) - } else { - dt - } + repeat(0.., (Token::LBracket, Token::RBracket)) + .fold1(data_type_stateful_inner, |mut acc, _| { + acc = DataType::Array(Box::new(acc)); + acc }) .parse_next(input) } From 686217c444aaf356528d0b4af8058d8b4be3f62d Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 12:16:11 +0800 Subject: [PATCH 15/39] add a context Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 33f5caa7aa22..b46606f9e5ff 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -67,6 +67,7 @@ where ), consume_close, ) + .context(StrContext::Label("struct_data_type")) .parse_next(input) } From 51f9265b944cc4ae3f448da45e096b3c55290540 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 12:17:21 +0800 Subject: [PATCH 16/39] fix unused import Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index b46606f9e5ff..ce7d81396452 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -2,7 +2,7 @@ use core::cell::RefCell; use std::rc::Rc; use winnow::combinator::{ - alt, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, Repeat, + alt, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, }; use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; From 89f67e5e9b79437641dc645e0d3583388595eecf Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 12:52:57 +0800 Subject: [PATCH 17/39] fix struct parsing Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index ce7d81396452..b200710a8a39 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -2,7 +2,7 @@ use core::cell::RefCell; use std::rc::Rc; use winnow::combinator::{ - alt, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, + alt, cut_err, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, }; use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; @@ -59,7 +59,6 @@ where seq! { StructField { name: identifier_non_reserved, - _: Token::Colon, data_type: data_type_stateful, } }, @@ -137,9 +136,9 @@ where Keyword::INTERVAL => empty.value(DataType::Interval), Keyword::REGCLASS => empty.value(DataType::Regclass), Keyword::REGPROC => empty.value(DataType::Regproc), - Keyword::STRUCT => struct_data_type.map(DataType::Struct), + Keyword::STRUCT => cut_err(struct_data_type).map(DataType::Struct), Keyword::BYTEA => empty.value(DataType::Bytea), - Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => precision_and_scale().map(|(precision, scale)| { + Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => cut_err(precision_and_scale()).map(|(precision, scale)| { DataType::Decimal(precision, scale) }), _ => fail, From 6b62577718c30d5196779b2bfefdb73ad2d42f70 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 16:23:29 +0800 Subject: [PATCH 18/39] fix float parsing Signed-off-by: TennyZhuang --- Cargo.lock | 13 +++++++------ src/sqlparser/Cargo.toml | 1 + src/sqlparser/src/parser.rs | 9 ++++++++- src/sqlparser/src/parser_v2/data_type.rs | 2 +- src/sqlparser/src/parser_v2/number.rs | 22 +++++++++++++++++++--- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7827afb0a348..3dbe9b24b70e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" dependencies = [ "backtrace", ] @@ -11411,6 +11411,7 @@ dependencies = [ "itertools 0.12.1", "matches", "serde", + "thiserror", "tracing", "tracing-subscriber", "winnow 0.6.8 (git+https://github.com/TennyZhuang/winnow.git?rev=a6b1f04)", @@ -14111,9 +14112,9 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] @@ -14142,9 +14143,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", diff --git a/src/sqlparser/Cargo.toml b/src/sqlparser/Cargo.toml index f55a7823e5f5..8c20eb9bf2f2 100644 --- a/src/sqlparser/Cargo.toml +++ b/src/sqlparser/Cargo.toml @@ -24,6 +24,7 @@ normal = ["workspace-hack"] [dependencies] itertools = { workspace = true } serde = { version = "1.0", features = ["derive"], optional = true } +thiserror = "1.0.61" tracing = "0.1" tracing-subscriber = "0.3" winnow = { version = "0.6.8", git = "https://github.com/TennyZhuang/winnow.git", rev = "a6b1f04" } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 51d9b100e11b..4f32e0627b3e 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -195,10 +195,17 @@ impl Parser { tokens: &self.tokens[self.index..], }); let output = parse_next.parse_next(&mut token_stream).map_err(|e| { + let msg = if let Some(e) = e.into_inner() + && let Some(cause) = e.cause() + { + cause.to_string() + } else { + "".to_string() + }; ParserError::ParserError(format!( "Unexpected {}: {}", self.tokens[self.index + token_stream.location()], - e + msg )) }); let offset = token_stream.location(); diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index b200710a8a39..9c67e44ded21 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -83,7 +83,7 @@ fn data_type_stateful(input: &mut StatefulStream) -> PResult where S: TokenStream, { - repeat(0.., (Token::LBracket, Token::RBracket)) + repeat(0.., (Token::LBracket, cut_err(Token::RBracket))) .fold1(data_type_stateful_inner, |mut acc, _| { acc = DataType::Array(Box::new(acc)); acc diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 2efaf00af2f3..494ed6960d40 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -1,6 +1,6 @@ use core::ops::RangeBounds; -use winnow::combinator::delimited; +use winnow::combinator::{cut_err, delimited}; use winnow::error::ContextError; use winnow::{PResult, Parser}; @@ -30,9 +30,25 @@ where token_number.try_map(|s| s.parse::()).parse_next(input) } -pub fn precision_in_range(range: impl RangeBounds) -> impl Parser +pub fn precision_in_range( + range: impl RangeBounds + std::fmt::Debug, +) -> impl Parser where S: TokenStream, { - delimited(Token::LParen, literal_uint, Token::LParen).verify(move |v| range.contains(v)) + #[derive(Debug, thiserror::Error)] + enum Error { + #[error("Precision must be in range {0:?}")] + OutOfRange(String), + } + + cut_err( + delimited(Token::LParen, literal_uint, Token::LParen).try_map(move |v| { + if range.contains(&v) { + Ok(v) + } else { + Err(Error::OutOfRange(format!("{:?}", range))) + } + }), + ) } From 32d29db8cc8722a73c0a3503408031d346b966d0 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 16:24:02 +0800 Subject: [PATCH 19/39] fix precision parsing Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/number.rs | 2 +- src/sqlparser/tests/testdata/array.yaml | 8 ++------ src/sqlparser/tests/testdata/select.yaml | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 494ed6960d40..74aef394d342 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -43,7 +43,7 @@ where } cut_err( - delimited(Token::LParen, literal_uint, Token::LParen).try_map(move |v| { + delimited(Token::LParen, literal_uint, Token::RParen).try_map(move |v| { if range.contains(&v) { Ok(v) } else { diff --git a/src/sqlparser/tests/testdata/array.yaml b/src/sqlparser/tests/testdata/array.yaml index 9af94c041fdc..28e2618ba732 100644 --- a/src/sqlparser/tests/testdata/array.yaml +++ b/src/sqlparser/tests/testdata/array.yaml @@ -6,13 +6,9 @@ - input: CREATE TABLE t(a int[][][]); formatted_sql: CREATE TABLE t (a INT[][][]) - input: CREATE TABLE t(a int[); - error_msg: |- - sql parser error: Expected ], found: ) at line:1, column:23 - Near "CREATE TABLE t(a int[" + error_msg: 'sql parser error: Unexpected ) at line:1, column:23: ' - input: CREATE TABLE t(a int[[]); - error_msg: |- - sql parser error: Expected ], found: [ at line:1, column:23 - Near "CREATE TABLE t(a int[" + error_msg: 'sql parser error: Unexpected [ at line:1, column:23: ' - input: CREATE TABLE t(a int]); error_msg: |- sql parser error: Expected ',' or ')' after column definition, found: ] at line:1, column:22 diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml index b8bcfe18e87e..424d97eec721 100644 --- a/src/sqlparser/tests/testdata/select.yaml +++ b/src/sqlparser/tests/testdata/select.yaml @@ -120,9 +120,9 @@ - input: SELECT 0x error_msg: 'sql parser error: incomplete integer literal at Line: 1, Column 8' - input: SELECT 1::float(0) - error_msg: 'sql parser error: precision for type float must be at least 1 bit' + error_msg: 'sql parser error: Unexpected ( at line:1, column:16: Precision must be in range "1..53"' - input: SELECT 1::float(54) - error_msg: 'sql parser error: precision for type float must be less than 54 bits' + error_msg: 'sql parser error: Unexpected ( at line:1, column:16: Precision must be in range "1..53"' - input: SELECT 1::int(2) error_msg: |- sql parser error: Expected end of statement, found: ( at line:1, column:14 From c76c314e0d29363ef70a1738705a662d96a5ed32 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 16:47:19 +0800 Subject: [PATCH 20/39] add many comments Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 3 +++ src/sqlparser/src/parser_v2/data_type.rs | 24 ++++++++++++++++++++++++ src/sqlparser/src/parser_v2/impl_.rs | 13 +++++++++++++ src/sqlparser/src/parser_v2/mod.rs | 14 ++++++++++++++ src/sqlparser/src/parser_v2/number.rs | 18 +++++++++++++++++- 5 files changed, 71 insertions(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 4f32e0627b3e..106a93e1dce9 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -181,6 +181,9 @@ impl Parser { Parser { tokens, index: 0 } } + /// Adaptor for [`parser_v2`]. + /// + /// You can call a v2 parser from original parser by using this method. pub(crate) fn parse_v2<'a, O>( &'a mut self, mut parse_next: impl winnow::Parser< diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 9c67e44ded21..e8922dce7034 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -1,3 +1,20 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Parsers for data types. +//! +//! This module contains parsers for data types. To handle the anbiguity of `>>` and `> >` in struct definition, +//! we need to use a stateful parser here. See [`with_state`] for more information. + use core::cell::RefCell; use std::rc::Rc; @@ -25,6 +42,7 @@ struct DataTypeParsingState { type StatefulStream = Stateful; +/// Consume struct type definitions fn struct_data_type(input: &mut StatefulStream) -> PResult> where S: TokenStream, @@ -32,6 +50,7 @@ where let remaining_close1 = input.state.remaining_close.clone(); let remaining_close2 = input.state.remaining_close.clone(); + // Consume an abstract `>`, it may be the `remaining_close1` flag set by previous `>>`. let consume_close = alt(( move |input: &mut StatefulStream| -> PResult<()> { if *remaining_close1.borrow() { @@ -70,6 +89,9 @@ where .parse_next(input) } +/// Consume a data type definition. +/// +/// The parser is the main entry point for data type parsing. pub fn data_type(input: &mut S) -> PResult where S: TokenStream, @@ -79,6 +101,7 @@ where .parse_next(input) } +/// Data type parsing with stateful stream. fn data_type_stateful(input: &mut StatefulStream) -> PResult where S: TokenStream, @@ -91,6 +114,7 @@ where .parse_next(input) } +/// Consume a data type except [`DataType::Array`]. fn data_type_stateful_inner(input: &mut StatefulStream) -> PResult where S: TokenStream, diff --git a/src/sqlparser/src/parser_v2/impl_.rs b/src/sqlparser/src/parser_v2/impl_.rs index 1d9c7b8833dc..2ca4aa86d0a4 100644 --- a/src/sqlparser/src/parser_v2/impl_.rs +++ b/src/sqlparser/src/parser_v2/impl_.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use winnow::stream::{Checkpoint, Offset, SliceLen, Stream, StreamIsPartial, UpdateSlice}; use crate::tokenizer::{Token, TokenWithLocation, Whitespace}; @@ -12,6 +24,7 @@ impl<'a> Offset> for CheckpointWrapper<'a> { } } +/// Customized wrapper that implements [`TokenStream`], override [`Debug`] implementation for better diagnostics. #[derive(Default, Copy, Clone)] pub struct TokenStreamWrapper<'a> { pub tokens: &'a [TokenWithLocation], diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 6ebb666f2abf..882f2035b612 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -28,6 +28,9 @@ pub(crate) use data_type::*; pub(crate) use impl_::TokenStreamWrapper; pub(crate) use number::*; +/// Bundle trait requirements from winnow, so that we don't need to write them everywhere. +/// +/// All combinators should accept a generic `S` that implements `TokenStream`. pub trait TokenStream: Stream + StreamIsPartial + Location + Default { @@ -38,6 +41,7 @@ impl TokenStream for S where { } +/// Consume any token, including whitespaces. In almost all cases, you should use [`token`] instead. fn any_token(input: &mut S) -> PResult where S: TokenStream, @@ -45,6 +49,9 @@ where any(input) } +/// Consume any non-whitespace token. +/// +/// If you need to consume a specific token, use [`Token::?`][Token] directly, which already implements [`Parser`]. fn token(input: &mut S) -> PResult where S: TokenStream, @@ -60,6 +67,9 @@ where preceded(take_while(0.., WhiteSpace), any_token).parse_next(input) } +/// Consume a keyword. +/// +/// If you need to consume a specific keyword, use [`Keyword::?`][Keyword] directly, which already implements [`Parser`]. fn keyword(input: &mut S) -> PResult where S: TokenStream, @@ -98,6 +108,7 @@ where } } +/// Consume an identifier that is not a reserved keyword. fn identifier_non_reserved(input: &mut S) -> PResult where S: TokenStream, @@ -113,6 +124,9 @@ where .parse_next(input) } +/// Accept a subparser contains a given state. +/// +/// The state will be constructed using [`Default::default()`]. fn with_state(mut parse_next: ParseNext) -> impl Parser where S: TokenStream, diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 74aef394d342..76500599ee92 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use core::ops::RangeBounds; use winnow::combinator::{cut_err, delimited}; @@ -7,6 +19,7 @@ use winnow::{PResult, Parser}; use super::{token, TokenStream}; use crate::tokenizer::Token; +/// Consume a [number][Token::Number] from token. pub fn token_number(input: &mut S) -> PResult where S: TokenStream, @@ -22,7 +35,7 @@ where .parse_next(input) } -/// Parse an unsigned literal integer/long +/// Consume an unsigned literal integer/long pub fn literal_uint(input: &mut S) -> PResult where S: TokenStream, @@ -30,6 +43,9 @@ where token_number.try_map(|s| s.parse::()).parse_next(input) } +/// Consume a precision definition in some types, e.g. `FLOAT(32)`. +/// +/// The precision must be in the given range. pub fn precision_in_range( range: impl RangeBounds + std::fmt::Debug, ) -> impl Parser From d00ec9f3d032cc67cb85165b1674c62ab914e701 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 16:47:26 +0800 Subject: [PATCH 21/39] fix clippy Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/impl_.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/impl_.rs b/src/sqlparser/src/parser_v2/impl_.rs index 2ca4aa86d0a4..283aca68f6d3 100644 --- a/src/sqlparser/src/parser_v2/impl_.rs +++ b/src/sqlparser/src/parser_v2/impl_.rs @@ -122,7 +122,7 @@ impl<'a> Stream for TokenStreamWrapper<'a> { #[inline(always)] fn next_slice(&mut self, offset: usize) -> Self::Slice { TokenStreamWrapper { - tokens: &self.tokens.next_slice(offset), + tokens: self.tokens.next_slice(offset), } } From 60f2019230af2c5e6bcc6aededf950d685b2adc0 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 16:57:38 +0800 Subject: [PATCH 22/39] simplify error def Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/number.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 76500599ee92..5c87226d89e6 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -53,17 +53,15 @@ where S: TokenStream, { #[derive(Debug, thiserror::Error)] - enum Error { - #[error("Precision must be in range {0:?}")] - OutOfRange(String), - } + #[error("Precision must be in range {0:?}")] + struct OutOfRange(String); cut_err( delimited(Token::LParen, literal_uint, Token::RParen).try_map(move |v| { if range.contains(&v) { Ok(v) } else { - Err(Error::OutOfRange(format!("{:?}", range))) + Err(OutOfRange(format!("{:?}", range))) } }), ) From 3b366ab679e7e2c06d929359155a55a225cb2b20 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 17:16:05 +0800 Subject: [PATCH 23/39] fix precision parsing Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/number.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 5c87226d89e6..c9c1722da1e6 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -56,13 +56,15 @@ where #[error("Precision must be in range {0:?}")] struct OutOfRange(String); - cut_err( - delimited(Token::LParen, literal_uint, Token::RParen).try_map(move |v| { + delimited( + Token::LParen, + cut_err(literal_uint.try_map(move |v| { if range.contains(&v) { Ok(v) } else { Err(OutOfRange(format!("{:?}", range))) } - }), + })), + cut_err(Token::RParen), ) } From 26c7499f2fc7b34fc4afe417df78804aa5192dc8 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 17:24:05 +0800 Subject: [PATCH 24/39] fix double Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index e8922dce7034..3900c0e9ccef 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -143,7 +143,7 @@ where Keyword::BOOLEAN | Keyword::BOOL => empty.value(DataType::Boolean), Keyword::FLOAT => opt(precision_in_range(1..53)).map(DataType::Float), Keyword::REAL => empty.value(DataType::Real), - Keyword::DOUBLE => Keyword::PRECISION.value(DataType::Double), + Keyword::DOUBLE => opt(Keyword::PRECISION).value(DataType::Double), Keyword::SMALLINT => empty.value(DataType::SmallInt), Keyword::INT | Keyword::INTEGER => empty.value(DataType::Int), Keyword::BIGINT => empty.value(DataType::BigInt), From b26810d93df93f1607737d92c774a94c4f3080ec Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 17:47:29 +0800 Subject: [PATCH 25/39] fix custom type parsing Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 20 ++++++++++---------- src/sqlparser/src/parser_v2/mod.rs | 16 ++++++++++++++-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 3900c0e9ccef..2be45e10105b 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -25,10 +25,10 @@ use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; use super::{ - identifier_non_reserved, keyword, literal_uint, precision_in_range, token, with_state, + identifier_non_reserved, keyword, literal_uint, object_name, precision_in_range, with_state, TokenStream, }; -use crate::ast::{DataType, Ident, ObjectName, StructField}; +use crate::ast::{DataType, StructField}; use crate::keywords::Keyword; use crate::tokenizer::Token; @@ -170,16 +170,16 @@ where alt(( keywords, - token.verify_map(|t| match t.token { - // JSONB is not a keyword, but a special data type. - Token::Word(w) if w.value.eq_ignore_ascii_case("jsonb") => Some(DataType::Jsonb), - // FIXME: Really parse a full object name here. - Token::Word(w) => Some(DataType::Custom(ObjectName::from(vec![ - Ident::new_unchecked(w.value), - ]))), - _ => None, + object_name.map(|name| { + if name.to_string().eq_ignore_ascii_case("jsonb") { + // JSONB is not a keyword + DataType::Jsonb + } else { + DataType::Custom(name) + } }), )) + .context(StrContext::Label("non_keyword_data_type")) .context(StrContext::Label("data_type_inner")) .parse_next(input) } diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 882f2035b612..0e52bad14873 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -10,13 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use winnow::combinator::preceded; +use winnow::combinator::{preceded, separated}; use winnow::error::{ContextError, StrContext}; use winnow::stream::{ContainsToken, Location, Stream, StreamIsPartial}; use winnow::token::{any, take_while}; use winnow::{PResult, Parser, Stateful}; -use crate::ast::Ident; +use crate::ast::{Ident, ObjectName}; use crate::keywords::{self, Keyword}; use crate::tokenizer::{Token, TokenWithLocation}; @@ -124,6 +124,18 @@ where .parse_next(input) } +/// Consume an object name. +/// +/// FIXME: Object name is extremely complex, we only handle a subset here. +fn object_name(input: &mut S) -> PResult +where + S: TokenStream, +{ + separated(1.., identifier_non_reserved, Token::Period) + .map(ObjectName) + .parse_next(input) +} + /// Accept a subparser contains a given state. /// /// The state will be constructed using [`Default::default()`]. From 5cdab1983ac5e1f0dd5fd9f7620922b5b180b221 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 17:49:31 +0800 Subject: [PATCH 26/39] 1..54 Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 2be45e10105b..ece991f2bcde 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -141,7 +141,7 @@ where let keywords = dispatch! {keyword; Keyword::BOOLEAN | Keyword::BOOL => empty.value(DataType::Boolean), - Keyword::FLOAT => opt(precision_in_range(1..53)).map(DataType::Float), + Keyword::FLOAT => opt(precision_in_range(1..54)).map(DataType::Float), Keyword::REAL => empty.value(DataType::Real), Keyword::DOUBLE => opt(Keyword::PRECISION).value(DataType::Double), Keyword::SMALLINT => empty.value(DataType::SmallInt), From 79ce4ea49348c54d34459a76cddec670c2b7fe20 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 18:04:28 +0800 Subject: [PATCH 27/39] support TEXT Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index ece991f2bcde..e9211cb08bb0 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -160,6 +160,7 @@ where Keyword::INTERVAL => empty.value(DataType::Interval), Keyword::REGCLASS => empty.value(DataType::Regclass), Keyword::REGPROC => empty.value(DataType::Regproc), + Keyword::TEXT => empty.value(DataType::Text), Keyword::STRUCT => cut_err(struct_data_type).map(DataType::Struct), Keyword::BYTEA => empty.value(DataType::Bytea), Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => cut_err(precision_and_scale()).map(|(precision, scale)| { From df6137bb86627e6703f6a9609ac62b1d255dfec0 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 20:36:57 +0800 Subject: [PATCH 28/39] fix Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/impl_.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/impl_.rs b/src/sqlparser/src/parser_v2/impl_.rs index 283aca68f6d3..8e6498d5fde5 100644 --- a/src/sqlparser/src/parser_v2/impl_.rs +++ b/src/sqlparser/src/parser_v2/impl_.rs @@ -24,7 +24,7 @@ impl<'a> Offset> for CheckpointWrapper<'a> { } } -/// Customized wrapper that implements [`TokenStream`], override [`Debug`] implementation for better diagnostics. +/// Customized wrapper that implements [`TokenStream`][super::TokenStream], override [`Debug`] implementation for better diagnostics. #[derive(Default, Copy, Clone)] pub struct TokenStreamWrapper<'a> { pub tokens: &'a [TokenWithLocation], From 450441978462ae44ad2826120d305336cd201083 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 21:23:18 +0800 Subject: [PATCH 29/39] fix ut Signed-off-by: TennyZhuang --- src/sqlparser/tests/testdata/select.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml index 424d97eec721..a8cf8f612240 100644 --- a/src/sqlparser/tests/testdata/select.yaml +++ b/src/sqlparser/tests/testdata/select.yaml @@ -120,9 +120,9 @@ - input: SELECT 0x error_msg: 'sql parser error: incomplete integer literal at Line: 1, Column 8' - input: SELECT 1::float(0) - error_msg: 'sql parser error: Unexpected ( at line:1, column:16: Precision must be in range "1..53"' + error_msg: 'sql parser error: Unexpected 0 at line:1, column:17: Precision must be in range "1..54"' - input: SELECT 1::float(54) - error_msg: 'sql parser error: Unexpected ( at line:1, column:16: Precision must be in range "1..53"' + error_msg: 'sql parser error: Unexpected 54 at line:1, column:18: Precision must be in range "1..54"' - input: SELECT 1::int(2) error_msg: |- sql parser error: Expected end of statement, found: ( at line:1, column:14 From 0e239cc4813ba5993ce3f4e8fa911cf1ecdbc46e Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 21:44:20 +0800 Subject: [PATCH 30/39] refine error message Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 4 ++-- src/sqlparser/tests/testdata/array.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 106a93e1dce9..22b57f1c76a5 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -201,12 +201,12 @@ impl Parser { let msg = if let Some(e) = e.into_inner() && let Some(cause) = e.cause() { - cause.to_string() + format!(": {}", cause) } else { "".to_string() }; ParserError::ParserError(format!( - "Unexpected {}: {}", + "Unexpected {}{}", self.tokens[self.index + token_stream.location()], msg )) diff --git a/src/sqlparser/tests/testdata/array.yaml b/src/sqlparser/tests/testdata/array.yaml index 28e2618ba732..24d1ff803165 100644 --- a/src/sqlparser/tests/testdata/array.yaml +++ b/src/sqlparser/tests/testdata/array.yaml @@ -6,9 +6,9 @@ - input: CREATE TABLE t(a int[][][]); formatted_sql: CREATE TABLE t (a INT[][][]) - input: CREATE TABLE t(a int[); - error_msg: 'sql parser error: Unexpected ) at line:1, column:23: ' + error_msg: 'sql parser error: Unexpected ) at line:1, column:23' - input: CREATE TABLE t(a int[[]); - error_msg: 'sql parser error: Unexpected [ at line:1, column:23: ' + error_msg: 'sql parser error: Unexpected [ at line:1, column:23' - input: CREATE TABLE t(a int]); error_msg: |- sql parser error: Expected ',' or ')' after column definition, found: ] at line:1, column:22 From 9ba9e970635a4dba9010c55016d70a240ec89f28 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 22:36:16 +0800 Subject: [PATCH 31/39] fix stateful parsing Signed-off-by: TennyZhuang --- src/sqlparser/src/parser.rs | 6 +- src/sqlparser/src/parser_v2/data_type.rs | 111 +++++++++++++++-------- src/sqlparser/src/parser_v2/mod.rs | 10 +- src/sqlparser/tests/testdata/create.yaml | 1 - src/sqlparser/tests/testdata/struct.yaml | 4 + 5 files changed, 86 insertions(+), 46 deletions(-) diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 22b57f1c76a5..b70fc1b627ab 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -207,7 +207,11 @@ impl Parser { }; ParserError::ParserError(format!( "Unexpected {}{}", - self.tokens[self.index + token_stream.location()], + if self.index + token_stream.location() >= self.tokens.len() { + &"EOF" as &dyn std::fmt::Display + } else { + &self.tokens[self.index + token_stream.location()] as &dyn std::fmt::Display + }, msg )) }); diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index e9211cb08bb0..2c20ca90eced 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -20,6 +20,7 @@ use std::rc::Rc; use winnow::combinator::{ alt, cut_err, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, + terminated, trace, }; use winnow::error::StrContext; use winnow::{PResult, Parser, Stateful}; @@ -51,36 +52,49 @@ where let remaining_close2 = input.state.remaining_close.clone(); // Consume an abstract `>`, it may be the `remaining_close1` flag set by previous `>>`. - let consume_close = alt(( - move |input: &mut StatefulStream| -> PResult<()> { - if *remaining_close1.borrow() { - Ok(()) - } else { - fail(input) - } - } - .void(), - ( - Token::ShiftRight, - move |_input: &mut StatefulStream| -> PResult<()> { - *remaining_close2.borrow_mut() = true; - Ok(()) - }, - ) + let consume_close = trace( + "consume_struct_close", + alt(( + trace( + "consume_remaining_close", + move |input: &mut StatefulStream| -> PResult<()> { + if *remaining_close1.borrow() { + *remaining_close1.borrow_mut() = false; + Ok(()) + } else { + fail(input) + } + }, + ) .void(), - Token::Gt.void(), - )); + trace( + "produce_remaining_close", + ( + Token::ShiftRight, + move |_input: &mut StatefulStream| -> PResult<()> { + *remaining_close2.borrow_mut() = true; + Ok(()) + }, + ) + .void(), + ), + Token::Gt.void(), + )), + ); delimited( Token::Lt, separated( 1.., - seq! { - StructField { - name: identifier_non_reserved, - data_type: data_type_stateful, - } - }, + trace( + "struct_field", + seq! { + StructField { + name: identifier_non_reserved, + data_type: data_type_stateful, + } + }, + ), Token::Comma, ), consume_close, @@ -96,9 +110,22 @@ pub fn data_type(input: &mut S) -> PResult where S: TokenStream, { - with_state::(data_type_stateful) - .context(StrContext::Label("data_type")) - .parse_next(input) + with_state::(terminated( + data_type_stateful, + cut_err(trace( + "data_type_verify_state", + |input: &mut StatefulStream| { + // If there is remaining `>`, we should fail. + if *input.state.remaining_close.borrow() { + fail(input) + } else { + Ok(()) + } + }, + )), + )) + .context(StrContext::Label("data_type")) + .parse_next(input) } /// Data type parsing with stateful stream. @@ -169,18 +196,22 @@ where _ => fail, }; - alt(( - keywords, - object_name.map(|name| { - if name.to_string().eq_ignore_ascii_case("jsonb") { - // JSONB is not a keyword - DataType::Jsonb - } else { - DataType::Custom(name) - } - }), - )) - .context(StrContext::Label("non_keyword_data_type")) - .context(StrContext::Label("data_type_inner")) + trace( + "data_type_inner", + alt(( + keywords, + trace( + "non_keyword_data_type", + object_name.map(|name| { + if name.to_string().eq_ignore_ascii_case("jsonb") { + // JSONB is not a keyword + DataType::Jsonb + } else { + DataType::Custom(name) + } + }), + ), + )), + ) .parse_next(input) } diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 0e52bad14873..b794fb6d34a2 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use winnow::combinator::{preceded, separated}; +use winnow::combinator::{preceded, separated, trace}; use winnow::error::{ContextError, StrContext}; use winnow::stream::{ContainsToken, Location, Stream, StreamIsPartial}; use winnow::token::{any, take_while}; @@ -88,9 +88,11 @@ where I: TokenStream, { fn parse_next(&mut self, input: &mut I) -> PResult { - token - .verify(move |t: &TokenWithLocation| t.token == *self) - .parse_next(input) + trace( + format!("token {}", self), + token.verify(move |t: &TokenWithLocation| t.token == *self), + ) + .parse_next(input) } } diff --git a/src/sqlparser/tests/testdata/create.yaml b/src/sqlparser/tests/testdata/create.yaml index 5e5a668fb70a..98d5f215da3b 100644 --- a/src/sqlparser/tests/testdata/create.yaml +++ b/src/sqlparser/tests/testdata/create.yaml @@ -45,7 +45,6 @@ - input: CREATE TABLE T (v1 INT, v2 STRUCT) formatted_sql: CREATE TABLE T (v1 INT, v2 STRUCT) - input: CREATE TABLE T (v1 INT, v2 STRUCT>) - formatted_sql: CREATE TABLE T (v1 INT, v2 STRUCT>) - input: CREATE TABLE T (a STRUCT) formatted_sql: CREATE TABLE T (a STRUCT) - input: CREATE TABLE T (FULL INT) diff --git a/src/sqlparser/tests/testdata/struct.yaml b/src/sqlparser/tests/testdata/struct.yaml index 4898714fc585..4446ce8c1de4 100644 --- a/src/sqlparser/tests/testdata/struct.yaml +++ b/src/sqlparser/tests/testdata/struct.yaml @@ -3,3 +3,7 @@ formatted_sql: SELECT CAST(ROW(1 * 2, 1.0) AS foo) - input: SELECT ROW(1 * 2, 1.0)::foo; formatted_sql: SELECT CAST(ROW(1 * 2, 1.0) AS foo) +- input: SELECT NULL::STRUCT + formatted_sql: SELECT CAST(NULL AS STRUCT) +- input: SELECT NULL::STRUCT> + error_msg: 'sql parser error: Unexpected EOF' From 3cf20d1b428d199bff33c7828738e4e659762319 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 22:44:58 +0800 Subject: [PATCH 32/39] refine error Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 12 ++++++++++-- src/sqlparser/tests/testdata/struct.yaml | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 2c20ca90eced..78c44eed5d9f 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -22,7 +22,7 @@ use winnow::combinator::{ alt, cut_err, delimited, dispatch, empty, fail, opt, preceded, repeat, separated, seq, terminated, trace, }; -use winnow::error::StrContext; +use winnow::error::{ContextError, ErrMode, ErrorKind, FromExternalError, StrContext}; use winnow::{PResult, Parser, Stateful}; use super::{ @@ -110,6 +110,10 @@ pub fn data_type(input: &mut S) -> PResult where S: TokenStream, { + #[derive(Debug, thiserror::Error)] + #[error("Unconsumed `>>`")] + struct UnconsumedShiftRight; + with_state::(terminated( data_type_stateful, cut_err(trace( @@ -117,7 +121,11 @@ where |input: &mut StatefulStream| { // If there is remaining `>`, we should fail. if *input.state.remaining_close.borrow() { - fail(input) + Err(ErrMode::Cut(ContextError::from_external_error( + input, + ErrorKind::Fail, + UnconsumedShiftRight, + ))) } else { Ok(()) } diff --git a/src/sqlparser/tests/testdata/struct.yaml b/src/sqlparser/tests/testdata/struct.yaml index 4446ce8c1de4..0e0e5574b615 100644 --- a/src/sqlparser/tests/testdata/struct.yaml +++ b/src/sqlparser/tests/testdata/struct.yaml @@ -6,4 +6,4 @@ - input: SELECT NULL::STRUCT formatted_sql: SELECT CAST(NULL AS STRUCT) - input: SELECT NULL::STRUCT> - error_msg: 'sql parser error: Unexpected EOF' + error_msg: 'sql parser error: Unexpected EOF: Unconsumed `>>`' From 88562ff55b42a447e853a0eab386ccde897e1da2 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 23 May 2024 23:00:57 +0800 Subject: [PATCH 33/39] fix ut Signed-off-by: TennyZhuang --- src/sqlparser/tests/testdata/create.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sqlparser/tests/testdata/create.yaml b/src/sqlparser/tests/testdata/create.yaml index 98d5f215da3b..5e5a668fb70a 100644 --- a/src/sqlparser/tests/testdata/create.yaml +++ b/src/sqlparser/tests/testdata/create.yaml @@ -45,6 +45,7 @@ - input: CREATE TABLE T (v1 INT, v2 STRUCT) formatted_sql: CREATE TABLE T (v1 INT, v2 STRUCT) - input: CREATE TABLE T (v1 INT, v2 STRUCT>) + formatted_sql: CREATE TABLE T (v1 INT, v2 STRUCT>) - input: CREATE TABLE T (a STRUCT) formatted_sql: CREATE TABLE T (a STRUCT) - input: CREATE TABLE T (FULL INT) From 8452dc7b276e95b695cdeffeed6ea201e65ce7d8 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 24 May 2024 14:16:15 +0800 Subject: [PATCH 34/39] fix struct sep parsing Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 17 +++++++++++++---- src/sqlparser/tests/testdata/struct.yaml | 2 ++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 78c44eed5d9f..3326e0c9c75d 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -82,9 +82,18 @@ where )), ); + // If there is an `over-consumed' `>`, we shouldn't handle `,`. + let sep = |input: &mut StatefulStream| -> PResult<()> { + if *input.state.remaining_close.borrow() { + fail(input) + } else { + Token::Comma.void().parse_next(input) + } + }; + delimited( Token::Lt, - separated( + cut_err(separated( 1.., trace( "struct_field", @@ -95,9 +104,9 @@ where } }, ), - Token::Comma, - ), - consume_close, + sep, + )), + cut_err(consume_close), ) .context(StrContext::Label("struct_data_type")) .parse_next(input) diff --git a/src/sqlparser/tests/testdata/struct.yaml b/src/sqlparser/tests/testdata/struct.yaml index 0e0e5574b615..92b7e533b242 100644 --- a/src/sqlparser/tests/testdata/struct.yaml +++ b/src/sqlparser/tests/testdata/struct.yaml @@ -5,5 +5,7 @@ formatted_sql: SELECT CAST(ROW(1 * 2, 1.0) AS foo) - input: SELECT NULL::STRUCT formatted_sql: SELECT CAST(NULL AS STRUCT) +- input: create table st (v1 int, v2 struct>, v3 struct>) + formatted_sql: CREATE TABLE st (v1 INT, v2 STRUCT>, v3 STRUCT>) - input: SELECT NULL::STRUCT> error_msg: 'sql parser error: Unexpected EOF: Unconsumed `>>`' From f9d93c7523f9851159dd061eb13635e9692e5629 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 24 May 2024 14:20:42 +0800 Subject: [PATCH 35/39] Update src/sqlparser/src/parser_v2/number.rs Co-authored-by: Bugen Zhao --- src/sqlparser/src/parser_v2/number.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index c9c1722da1e6..212c09e04e35 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -53,7 +53,7 @@ where S: TokenStream, { #[derive(Debug, thiserror::Error)] - #[error("Precision must be in range {0:?}")] + #[error("Precision must be in range '{0}'")] struct OutOfRange(String); delimited( From 162a6633b597b1e118e6544d049337dddd7c6716 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 24 May 2024 14:31:30 +0800 Subject: [PATCH 36/39] address comments Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 29 +++++++++++------------- src/sqlparser/src/parser_v2/impl_.rs | 1 + src/sqlparser/src/parser_v2/mod.rs | 14 ++++-------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 3326e0c9c75d..0167280c83e9 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -125,21 +125,18 @@ where with_state::(terminated( data_type_stateful, - cut_err(trace( - "data_type_verify_state", - |input: &mut StatefulStream| { - // If there is remaining `>`, we should fail. - if *input.state.remaining_close.borrow() { - Err(ErrMode::Cut(ContextError::from_external_error( - input, - ErrorKind::Fail, - UnconsumedShiftRight, - ))) - } else { - Ok(()) - } - }, - )), + trace("data_type_verify_state", |input: &mut StatefulStream| { + // If there is remaining `>`, we should fail. + if *input.state.remaining_close.borrow() { + Err(ErrMode::Cut(ContextError::from_external_error( + input, + ErrorKind::Fail, + UnconsumedShiftRight, + ))) + } else { + Ok(()) + } + }), )) .context(StrContext::Label("data_type")) .parse_next(input) @@ -210,7 +207,7 @@ where Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => cut_err(precision_and_scale()).map(|(precision, scale)| { DataType::Decimal(precision, scale) }), - _ => fail, + _ => cut_err(fail), }; trace( diff --git a/src/sqlparser/src/parser_v2/impl_.rs b/src/sqlparser/src/parser_v2/impl_.rs index 8e6498d5fde5..fc09a02b7b59 100644 --- a/src/sqlparser/src/parser_v2/impl_.rs +++ b/src/sqlparser/src/parser_v2/impl_.rs @@ -138,6 +138,7 @@ impl<'a> Stream for TokenStreamWrapper<'a> { #[inline(always)] fn raw(&self) -> &dyn std::fmt::Debug { + // We customized the `Debug` implementation in the wrapper, so don't return `self.tokens` here. self } } diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index b794fb6d34a2..d6ea8bf06563 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -56,15 +56,11 @@ fn token(input: &mut S) -> PResult where S: TokenStream, { - struct WhiteSpace; - - impl ContainsToken for WhiteSpace { - fn contains_token(&self, token: TokenWithLocation) -> bool { - matches!(token.token, Token::Whitespace(_)) - } - } - - preceded(take_while(0.., WhiteSpace), any_token).parse_next(input) + preceded( + take_while(0.., |token| matches!(token.token, Token::Whitespace(_))), + any_token, + ) + .parse_next(input) } /// Consume a keyword. From cc9dbc3db4e8b4aea0dcd56ee1a5bbcbb3dc57ed Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 24 May 2024 15:39:57 +0800 Subject: [PATCH 37/39] fix warning Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index d6ea8bf06563..97e4d97a1c06 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -12,7 +12,7 @@ use winnow::combinator::{preceded, separated, trace}; use winnow::error::{ContextError, StrContext}; -use winnow::stream::{ContainsToken, Location, Stream, StreamIsPartial}; +use winnow::stream::{Location, Stream, StreamIsPartial}; use winnow::token::{any, take_while}; use winnow::{PResult, Parser, Stateful}; From d8b7d60efe13e7806019c0d08b1b084215d3a51a Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 24 May 2024 15:46:32 +0800 Subject: [PATCH 38/39] fix Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/mod.rs | 4 +++- src/sqlparser/src/parser_v2/number.rs | 2 +- src/sqlparser/tests/testdata/select.yaml | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/sqlparser/src/parser_v2/mod.rs b/src/sqlparser/src/parser_v2/mod.rs index 97e4d97a1c06..03ff0af218f3 100644 --- a/src/sqlparser/src/parser_v2/mod.rs +++ b/src/sqlparser/src/parser_v2/mod.rs @@ -57,7 +57,9 @@ where S: TokenStream, { preceded( - take_while(0.., |token| matches!(token.token, Token::Whitespace(_))), + take_while(0.., |token: TokenWithLocation| { + matches!(token.token, Token::Whitespace(_)) + }), any_token, ) .parse_next(input) diff --git a/src/sqlparser/src/parser_v2/number.rs b/src/sqlparser/src/parser_v2/number.rs index 212c09e04e35..e466d9f3019c 100644 --- a/src/sqlparser/src/parser_v2/number.rs +++ b/src/sqlparser/src/parser_v2/number.rs @@ -53,7 +53,7 @@ where S: TokenStream, { #[derive(Debug, thiserror::Error)] - #[error("Precision must be in range '{0}'")] + #[error("Precision must be in range {0}")] struct OutOfRange(String); delimited( diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml index a8cf8f612240..c2edc6391367 100644 --- a/src/sqlparser/tests/testdata/select.yaml +++ b/src/sqlparser/tests/testdata/select.yaml @@ -120,9 +120,9 @@ - input: SELECT 0x error_msg: 'sql parser error: incomplete integer literal at Line: 1, Column 8' - input: SELECT 1::float(0) - error_msg: 'sql parser error: Unexpected 0 at line:1, column:17: Precision must be in range "1..54"' + error_msg: 'sql parser error: Unexpected 0 at line:1, column:17: Precision must be in range 1..54' - input: SELECT 1::float(54) - error_msg: 'sql parser error: Unexpected 54 at line:1, column:18: Precision must be in range "1..54"' + error_msg: 'sql parser error: Unexpected 54 at line:1, column:18: Precision must be in range 1..54' - input: SELECT 1::int(2) error_msg: |- sql parser error: Expected end of statement, found: ( at line:1, column:14 From b7d79e10db76874f463fc362a85407c5d61738c5 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 24 May 2024 18:15:16 +0800 Subject: [PATCH 39/39] revert a behavior Signed-off-by: TennyZhuang --- src/sqlparser/src/parser_v2/data_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs index 0167280c83e9..a8e9ea20f3ba 100644 --- a/src/sqlparser/src/parser_v2/data_type.rs +++ b/src/sqlparser/src/parser_v2/data_type.rs @@ -207,7 +207,7 @@ where Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => cut_err(precision_and_scale()).map(|(precision, scale)| { DataType::Decimal(precision, scale) }), - _ => cut_err(fail), + _ => fail, }; trace(