Skip to content

Commit

Permalink
Merge pull request #1926 from jqnatividad/moar_sqlp_functions
Browse files Browse the repository at this point in the history
Moar polars power!
  • Loading branch information
jqnatividad authored Jun 28, 2024
2 parents 8626b43 + 7fa475f commit 78fd47d
Show file tree
Hide file tree
Showing 5 changed files with 299 additions and 48 deletions.
60 changes: 21 additions & 39 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ polars = { version = "0.41", features = [
"coalesce",
"cse",
"cross_join",
"dtype-categorical",
"dtype-full",
"extract_jsonpath",
"ipc",
Expand All @@ -158,9 +159,9 @@ polars = { version = "0.41", features = [
"serde-lazy",
"sql",
"streaming",
"temporal",
"timezones",
], optional = true }
polars-ops = { version = "0.41", optional = true }
pyo3 = { version = "0.22", features = [
"auto-initialize",
"gil-refs",
Expand Down Expand Up @@ -258,6 +259,9 @@ grex = { git = "https://github.com/pemistahl/grex", rev = "0c8ab87" }
calamine = { git = "https://github.com/tafia/calamine", rev = "6b41309" }
# use modernized version of local_encoding
local-encoding = { git = "https://github.com/slonopotamus/local-encoding-rs", branch = "travis-madness" }
# use latest upstream version of polars with additional unreleased features/fixes
polars = { git = "https://github.com/pola-rs/polars", rev = "bcc8a92" }


[features]
default = ["mimalloc"]
Expand Down Expand Up @@ -315,7 +319,7 @@ to = ["csvs_convert"]
to_parquet = ["csvs_convert/parquet"]
lite = []
datapusher_plus = ["self_update"]
polars = ["dep:polars", "polars-ops", "smartstring"]
polars = ["dep:polars", "smartstring"]
feature_capable = []
nightly = [
"regex/unstable",
Expand Down
5 changes: 2 additions & 3 deletions src/cmd/joinp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,11 @@ use std::{
use polars::{
datatypes::AnyValue,
prelude::{
AsOfOptions, AsofStrategy, CsvWriter, IntoLazy, JoinType, JoinValidation, LazyCsvReader,
LazyFileListReader, LazyFrame, SerWriter, SortMultipleOptions,
AsOfOptions, AsofStrategy, CsvWriter, IntoLazy, JoinCoalesce, JoinType, JoinValidation,
LazyCsvReader, LazyFileListReader, LazyFrame, SerWriter, SortMultipleOptions,
},
sql::SQLContext,
};
use polars_ops::frame::JoinCoalesce;
use serde::Deserialize;
use smartstring::SmartString;
use tempfile::tempdir;
Expand Down
17 changes: 16 additions & 1 deletion src/cmd/sqlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Returns the shape of the query result (number of rows, number of columns) to std
Example queries:
qsv sqlp data.csv 'select * from data where col1 > 10 order by col2 desc limit 20'
qsv sqlp data.csv 'select * from data where col1 > 10 order by all desc limit 20'
qsv sqlp data.csv 'select col1, col2 as friendlyname from data' --format parquet --output data.parquet
Expand Down Expand Up @@ -71,6 +71,19 @@ Example queries:
# https://en.wikipedia.org/wiki/Three-way_comparison#Spaceship_operator
qsv sqlp data.csv data2.csv "select data.c2 <=> data2.c2 from data join data2 on data.c1 = data2.c1"
# support ^@ ("starts with"), and ~~ (like) ,~~* (ilike),!~~ (not like),!~~* (not ilike) operators
qsv sqlp data.csv "select * from data WHERE col1 ^@ 'foo'"
qsv sqlp data.csv "select c1 ^@ 'a' AS c1_starts_with_a from data"
qsv sqlp data.csv "select c1 ~~* '%B' AS c1_ends_with_b_caseinsensitive from data"
# support SELECT * ILIKE wildcard syntax
# select all columns from customers where the column contains 'a' followed by an 'e'
# with any characters (or no characters), in between, case-insensitive
# if customers.csv has columns LastName, FirstName, Address, City, State, Zip
# this query will return all columns for all rows except the columns that don't
# contain 'a' followed by an 'e' - i.e. except City and Zip
qsv sqlp customers.csv "SELECT * ILIKE '%a%e%' FROM customers ORDER BY LastName, FirstName"
# regex operators: "~" (contains pattern, case-sensitive); "~*" (contains pattern, case-insensitive)
# "!~" (does not contain pattern, case-sensitive); "!~*" (does not contain pattern, case-insensitive)
qsv sqlp data.csv "select * from data WHERE col1 ~ '^foo' AND col2 > 10"
Expand All @@ -94,6 +107,8 @@ Example queries:
qsv sqlp data.csv "select * from data join read_parquet('data2.parquet') as t2 ON data.c1 = t2.c1"
qsv sqlp data.csv "select * from data join read_ndjson('data2.jsonl') as t2 on data.c1 = t2.c1"
qsv sqlp data.csv "select * from data join read_ipc('data2.arrow') as t2 ON data.c1 = t2.c1"
qsv sqlp SKIP_INPUT "select * from read_parquet('data.parquet') order by col1 desc limit 100"
qsv sqlp SKIP_INPUT "select * from read_ndjson('data.jsonl') as t1 join read_ipc('data.arrow') as t2 on t1.c1 = t2.c1"
# you can also directly load CSVs using the Polars read_csv() SQL function. This is useful when
# you want to bypass the regular CSV parser (with SKIP_INPUT) and use Polars' multithreaded,
Expand Down
Loading

0 comments on commit 78fd47d

Please sign in to comment.