From b8ee29b1bffb29ffa63d3a65dcc5c8ee711b1417 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Wed, 25 Dec 2024 10:40:14 +0800 Subject: [PATCH] fix(query): uri path should not be percent-encoded (#17109) --- Cargo.lock | 2 +- src/query/ast/Cargo.toml | 1 + src/query/ast/src/ast/statements/copy.rs | 5 ++++- src/query/sql/Cargo.toml | 1 - src/query/sql/src/planner/binder/location.rs | 4 +--- .../00_stage/00_0005_copy_into_location.result | 4 ++++ .../1_stateful/00_stage/00_0005_copy_into_location.sh | 9 +++++++++ 7 files changed, 20 insertions(+), 6 deletions(-) create mode 100755 tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result create mode 100755 tests/suites/1_stateful/00_stage/00_0005_copy_into_location.sh diff --git a/Cargo.lock b/Cargo.lock index 7f82bd53d877..fdcab12d8d7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3002,6 +3002,7 @@ dependencies = [ "nom", "nom-rule", "ordered-float 4.5.0", + "percent-encoding", "pratt", "pretty", "pretty_assertions", @@ -4039,7 +4040,6 @@ dependencies = [ "num-traits", "opendal", "parking_lot 0.12.3", - "percent-encoding", "prqlc", "rand", "recursive", diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index 9fb154376fc3..8230b245d4a9 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -23,6 +23,7 @@ logos = { workspace = true } nom = { workspace = true } nom-rule = { workspace = true } ordered-float = { workspace = true } +percent-encoding = { workspace = true } pratt = { workspace = true } pretty = { workspace = true } pretty_assertions = { workspace = true } diff --git a/src/query/ast/src/ast/statements/copy.rs b/src/query/ast/src/ast/statements/copy.rs index c31ed3096c0e..8e10e3731827 100644 --- a/src/query/ast/src/ast/statements/copy.rs +++ b/src/query/ast/src/ast/statements/copy.rs @@ -22,6 +22,7 @@ use std::str::FromStr; use derive_visitor::Drive; use derive_visitor::DriveMut; use itertools::Itertools; +use percent_encoding::percent_decode_str; use url::Url; use crate::ast::quote::QuotedString; @@ -477,7 +478,9 @@ impl UriLocation { let path = if parsed.path().is_empty() { "/".to_string() } else { - parsed.path().to_string() + percent_decode_str(parsed.path()) + .decode_utf8_lossy() + .to_string() }; Ok(Self { diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index 1f017a82dac1..e0b1e7d9e1a4 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -64,7 +64,6 @@ num-derive = { workspace = true } num-traits = { workspace = true } opendal = { workspace = true } parking_lot = { workspace = true } -percent-encoding = { workspace = true } prqlc = { workspace = true } rand = { workspace = true } recursive = { workspace = true } diff --git a/src/query/sql/src/planner/binder/location.rs b/src/query/sql/src/planner/binder/location.rs index 0a4973fce1c4..5d74c26dcf4b 100644 --- a/src/query/sql/src/planner/binder/location.rs +++ b/src/query/sql/src/planner/binder/location.rs @@ -42,7 +42,6 @@ use databend_common_storage::STDIN_FD; use opendal::raw::normalize_path; use opendal::raw::normalize_root; use opendal::Scheme; -use percent_encoding::percent_decode_str; /// secure_omission will fix omitted endpoint url schemes into 'https://' #[inline] @@ -538,10 +537,9 @@ pub async fn parse_uri_location( Scheme::Cos => parse_cos_params(l, root)?, Scheme::Http => { // Make sure path has been percent decoded before parse pattern. - let path = percent_decode_str(&l.path).decode_utf8_lossy(); let cfg = StorageHttpConfig { endpoint_url: format!("{}://{}", l.protocol, l.name), - paths: globiter::Pattern::parse(&path) + paths: globiter::Pattern::parse(&l.path) .map_err(|err| { Error::new( ErrorKind::InvalidInput, diff --git a/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result b/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result new file mode 100755 index 000000000000..efa2237d17e2 --- /dev/null +++ b/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result @@ -0,0 +1,4 @@ +>>>> create or replace connection c_00_0005 storage_type='s3' access_key_id = 'minioadmin' endpoint_url = 'http://127.0.0.1:9900' secret_access_key = 'minioadmin' +>>>> copy into 's3://testbucket/c_00_0005/ab de/f' connection=(connection_name='c_00_0005') from (select 1) detailed_output=true use_raw_path=true single=true overwrite=true +c_00_0005/ab de/f 374 1 +<<<< diff --git a/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.sh b/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.sh new file mode 100755 index 000000000000..4e0ab99e5eb4 --- /dev/null +++ b/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + + +stmt "create or replace connection c_00_0005 storage_type='s3' access_key_id = 'minioadmin' endpoint_url = 'http://127.0.0.1:9900' secret_access_key = 'minioadmin'" + +query "copy into 's3://testbucket/c_00_0005/ab de/f' connection=(connection_name='c_00_0005') from (select 1) detailed_output=true use_raw_path=true single=true overwrite=true"