diff --git a/Cargo.lock b/Cargo.lock index 0b9e2e042..4d6a0b075 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4122,7 +4122,7 @@ dependencies = [ [[package]] name = "polars" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "getrandom", "polars-arrow", @@ -4141,7 +4141,7 @@ dependencies = [ [[package]] name = "polars-arrow" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "atoi", @@ -4188,7 +4188,7 @@ dependencies = [ [[package]] name = "polars-compute" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "bytemuck", "either", @@ -4203,7 +4203,7 @@ dependencies = [ [[package]] name = "polars-core" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "bitflags 2.6.0", @@ -4227,7 +4227,6 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "thiserror", "version_check", "xxhash-rust", @@ -4236,7 +4235,7 @@ dependencies = [ [[package]] name = "polars-error" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "avro-schema", "object_store", @@ -4249,7 +4248,7 @@ dependencies = [ [[package]] name = "polars-expr" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "bitflags 2.6.0", @@ -4262,13 +4261,12 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", ] [[package]] name = "polars-io" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "async-trait", @@ -4305,7 +4303,6 @@ dependencies = [ "serde_json", "simd-json", "simdutf8", - "smartstring", "tokio", "tokio-util", "url", @@ -4314,7 +4311,7 @@ dependencies = [ [[package]] name = "polars-json" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "chrono", @@ -4335,7 +4332,7 @@ dependencies = [ [[package]] name = "polars-lazy" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "bitflags 2.6.0", @@ -4354,7 +4351,6 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", "tokio", "version_check", ] @@ -4362,7 +4358,7 @@ dependencies = [ [[package]] name = "polars-mem-engine" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "futures", "memmap2", @@ -4383,7 +4379,7 @@ dependencies = [ [[package]] name = "polars-ops" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "argminmax", @@ -4408,7 +4404,6 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "unicode-reverse", "version_check", ] @@ -4416,7 +4411,7 @@ dependencies = [ [[package]] name = "polars-parquet" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "async-stream", @@ -4443,7 +4438,7 @@ dependencies = [ [[package]] name = "polars-pipe" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -4461,7 +4456,6 @@ dependencies = [ "polars-row", "polars-utils", "rayon", - "smartstring", "tokio", "uuid", "version_check", @@ -4470,7 +4464,7 @@ dependencies = [ [[package]] name = "polars-plan" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "bitflags 2.6.0", @@ -4495,7 +4489,6 @@ dependencies = [ "recursive", "regex", "serde", - "smartstring", "strum_macros", "version_check", ] @@ -4503,7 +4496,7 @@ dependencies = [ [[package]] name = "polars-row" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "bytemuck", "polars-arrow", @@ -4514,7 +4507,7 @@ dependencies = [ [[package]] name = "polars-sql" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "hex", "once_cell", @@ -4525,6 +4518,7 @@ dependencies = [ "polars-ops", "polars-plan", "polars-time", + "polars-utils", "rand", "serde", "serde_json", @@ -4534,7 +4528,7 @@ dependencies = [ [[package]] name = "polars-time" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "atoi", "bytemuck", @@ -4549,13 +4543,12 @@ dependencies = [ "polars-utils", "regex", "serde", - "smartstring", ] [[package]] name = "polars-utils" version = "0.42.0" -source = "git+https://github.com/pola-rs/polars?tag=py-1.6.0#6ff1c70d25f108306ea53138b11eb262288b7251" +source = "git+https://github.com/pola-rs/polars?rev=915f164#915f1648a9e2f461fdcd497db8c588c0419dfa35" dependencies = [ "ahash", "bytemuck", @@ -4569,7 +4562,7 @@ dependencies = [ "polars-error", "raw-cpuid", "rayon", - "smartstring", + "serde", "stacker", "sysinfo", "version_check", @@ -4857,6 +4850,7 @@ dependencies = [ "parking_lot 0.12.3", "phf 0.11.2", "polars", + "polars-utils", "postgres", "pyo3", "qsv-dateparser", @@ -4886,7 +4880,6 @@ dependencies = [ "serial_test", "simdutf8", "simple-expand-tilde", - "smartstring", "snap", "strsim", "strum", @@ -5948,18 +5941,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "serde", - "static_assertions", - "version_check", -] - [[package]] name = "snafu" version = "0.7.5" diff --git a/Cargo.toml b/Cargo.toml index faa880dea..737d8f11b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -166,6 +166,7 @@ polars = { version = "0.42", features = [ "streaming", "timezones", ], optional = true } +polars-utils = { version = "0.42.0", default-features = false, optional = true} pyo3 = { version = "0.22", features = [ "auto-initialize", "gil-refs", @@ -214,7 +215,6 @@ serde_json = { version = "1", features = ["preserve_order"] } serde_stacker = { version = "0.1", optional = true } serde_urlencoded = { version = "0.7", optional = true } simple-expand-tilde = { version = "0.4.0", optional = true } -smartstring = { version = "1", optional = true } snap = "1" strsim = { version = "0.11", optional = true } strum = { version = "0.26", features = ["phf"] } @@ -285,10 +285,10 @@ local-encoding = { git = "https://github.com/slonopotamus/local-encoding-rs", br # BUILD NOTE: Be sure to set QSV_POLARS_REV below to the latest commit short hash or tag # of polars/py-polars before building qsv. This allows us to show the polars rev/tag in --version. # if we are using a release version of Rust Polars, leave QSV_POLARS_REV empty -# QSV_POLARS_REV=py-1.6.0 -polars = { git = "https://github.com/pola-rs/polars", tag = "py-1.6.0" } -# polars = { git = "https://github.com/pola-rs/polars", rev = "f0ba999" } - +# QSV_POLARS_REV=915f164 +# polars = { git = "https://github.com/pola-rs/polars", tag = "py-1.6.0" } +polars = { git = "https://github.com/pola-rs/polars", rev = "915f164" } +polars-utils = { git = "https://github.com/pola-rs/polars", rev = "915f164" } [features] default = ["mimalloc"] @@ -345,7 +345,7 @@ python = ["pyo3"] to = ["csvs_convert"] lite = [] datapusher_plus = ["self_update"] -polars = ["dep:polars", "smartstring"] +polars = ["dep:polars", "polars-utils"] feature_capable = [] nightly = [ "rand/nightly", diff --git a/src/cmd/count.rs b/src/cmd/count.rs index f5f877df1..23ac860aa 100644 --- a/src/cmd/count.rs +++ b/src/cmd/count.rs @@ -208,7 +208,7 @@ pub fn polars_count_input( let mut comment_char = String::new(); let comment_prefix = if let Some(c) = conf.comment { comment_char.push(c as char); - Some(comment_char.as_str()) + Some(PlSmallStr::from_str(comment_char.as_str())) } else { None }; diff --git a/src/cmd/joinp.rs b/src/cmd/joinp.rs index 778be0d5e..4f9d4b114 100644 --- a/src/cmd/joinp.rs +++ b/src/cmd/joinp.rs @@ -193,7 +193,6 @@ use std::{ use polars::{datatypes::AnyValue, prelude::*, sql::SQLContext}; use serde::Deserialize; -use smartstring::SmartString; use tempfile::tempdir; use crate::{cmd::sqlp::compress_output_if_needed, config::Delimiter, util, CliResult}; @@ -335,7 +334,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { args.flag_left_by .unwrap() .split(',') - .map(smartstring::SmartString::from) + .map(PlSmallStr::from_str) .collect(), ); } @@ -344,7 +343,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { args.flag_right_by .unwrap() .split(',') - .map(smartstring::SmartString::from) + .map(PlSmallStr::from_str) .collect(), ); } @@ -448,19 +447,22 @@ impl JoinStruct { } else { if asof_join { // sort by the asof columns, as asof joins require sorted join column data - let left_selcols_smartsvec: Vec> = - self.left_sel.split(',').map(SmartString::from).collect(); + let left_selcols_vec: Vec = + self.left_sel.split(',').map(PlSmallStr::from_str).collect(); self.left_lf = self .left_lf - .sort(&left_selcols_smartsvec, SortMultipleOptions::default()); + .sort(&left_selcols_vec, SortMultipleOptions::default()); - let right_selcols_smartsvec: Vec> = - self.right_sel.split(',').map(SmartString::from).collect(); + let right_selcols_vec: Vec = self + .right_sel + .split(',') + .map(PlSmallStr::from_str) + .collect(); self.right_lf = self .right_lf - .sort(&right_selcols_smartsvec, SortMultipleOptions::default()); + .sort(&right_selcols_vec, SortMultipleOptions::default()); } self.left_lf @@ -534,7 +536,7 @@ impl Args { }; let comment_char = if let Ok(comment_char) = env::var("QSV_COMMENT_CHAR") { - Some(comment_char) + Some(PlSmallStr::from_string(comment_char)) } else { None }; @@ -567,7 +569,7 @@ impl Args { LazyCsvReader::new(&self.arg_input1) .with_has_header(true) .with_missing_is_null(self.flag_nulls) - .with_comment_prefix(comment_char.as_deref()) + .with_comment_prefix(comment_char.clone()) .with_separator(tsvssv_delim(&self.arg_input1, delim)) .with_infer_schema_length(num_rows) .with_try_parse_dates(try_parsedates) @@ -593,7 +595,7 @@ impl Args { LazyCsvReader::new(&self.arg_input2) .with_has_header(true) .with_missing_is_null(self.flag_nulls) - .with_comment_prefix(comment_char.as_deref()) + .with_comment_prefix(comment_char) .with_separator(tsvssv_delim(&self.arg_input2, delim)) .with_infer_schema_length(num_rows) .with_try_parse_dates(try_parsedates) diff --git a/src/cmd/sqlp.rs b/src/cmd/sqlp.rs index 502125e52..0f0370320 100644 --- a/src/cmd/sqlp.rs +++ b/src/cmd/sqlp.rs @@ -270,6 +270,7 @@ use polars::{ }, sql::SQLContext, }; +use polars_utils::pl_str::PlSmallStr; use regex::Regex; use serde::Deserialize; @@ -561,15 +562,15 @@ pub fn run(argv: &[&str]) -> CliResult<()> { }; let rnull_values = if args.flag_rnull_values == "" { - vec![String::new()] + vec![PlSmallStr::const_default()] } else { args.flag_rnull_values .split(',') .map(|value| { if value == "" { - String::new() + PlSmallStr::const_default() } else { - value.to_string() + PlSmallStr::from_str(value) } }) .collect() @@ -591,7 +592,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { }; let comment_char = if let Ok(comment_char) = env::var("QSV_COMMENT_CHAR") { - Some(comment_char) + Some(PlSmallStr::from_string(comment_char)) } else { None }; @@ -737,7 +738,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let lf = LazyCsvReader::new(table) .with_has_header(true) .with_missing_is_null(true) - .with_comment_prefix(comment_char.as_deref()) + .with_comment_prefix(comment_char.clone()) .with_null_values(Some(NullValues::AllColumns(rnull_values.clone()))) .with_separator(tsvssv_delim(table, delim)) .with_infer_schema_length(Some(args.flag_infer_len)) diff --git a/tests/test_sqlp.rs b/tests/test_sqlp.rs index ce63e4c0e..138e06f59 100644 --- a/tests/test_sqlp.rs +++ b/tests/test_sqlp.rs @@ -1625,11 +1625,11 @@ fn sqlp_div_sign() { let got: Vec> = wrk.read_stdout(&mut cmd); let expected = vec![ svec!["a_div_b", "a_floordiv_b", "b_sign"], - svec!["-0.09950248756218906", "-1", "-1"], - svec!["2.857142857142857", "2", "1"], - svec!["12.0", "12", "1"], + svec!["-0.09950248756218906", "-1", "-1.0"], + svec!["2.857142857142857", "2", "1.0"], + svec!["12.0", "12", "1.0"], svec!["", "", ""], - svec!["-15.92356687898089", "-16", "-1"], + svec!["-15.92356687898089", "-16", "-1.0"], ]; assert_eq!(got, expected); @@ -2748,3 +2748,52 @@ IT Rome,Milan,Turin,Naples,Venice assert_eq!(got, expected); } + +// #[test] +// fn sqlp_generate_graphviz_plan() { +// let wrk = Workdir::new("sqlp_generate_graphviz_plan"); + +// wrk.create( +// "data.csv", +// vec![ +// svec!["a", "b", "c"], +// svec!["1", "2", "3"], +// svec!["4", "5", "6"], +// svec!["7", "8", "9"], +// ], +// ); + +// let output_dotfile = wrk.path("output.dot").to_string_lossy().to_string(); +// std::env::set_var("POLARS_VISUALIZE_PHYSICAL_PLAN", output_dotfile.as_str()); + +// let mut cmd = wrk.command("sqlp"); +// cmd.arg("data.csv").arg( +// r#" +// SELECT a, b, c +// FROM data +// WHERE a > 2 +// ORDER BY a DESC +// "#, +// ).arg("--streaming"); + +// wrk.assert_success(&mut cmd); + +// // assert!(std::path::Path::new(&output_dotfile).exists()); + +// let got: Vec> = wrk.read_stdout(&mut cmd); +// let expected = vec![ +// svec!["a", "b", "c"], +// svec!["7", "8", "9"], +// svec!["4", "5", "6"], +// ]; + +// assert_eq!(got, expected); + +// let got_dot = wrk.read_to_string(&output_dotfile); +// let expected_dot = r#"digraph { +// "Projection" -> "Filter"; +// "Filter" -> "CsvScan"; +// }"#; + +// assert_eq!(got_dot, expected_dot); +// }