From bf78f49b6b0c1d0efd504f8d7b38a01162d95ea4 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Thu, 7 Mar 2024 13:16:09 -0600 Subject: [PATCH] bump polars version (#177) * bump polars version * lint --- Cargo.toml | 26 +++++++++++--------- __tests__/dataframe.test.ts | 4 +-- __tests__/expr.test.ts | 10 ++++---- __tests__/series.test.ts | 8 +++--- polars/lazy/expr/index.ts | 11 ++++++++- polars/lazy/expr/list.ts | 15 ++++++++++-- polars/lazy/expr/string.ts | 27 ++++++++++++++------ polars/lazy/functions.ts | 18 ++++++++++---- polars/series/index.ts | 16 ++++++------ polars/series/list.ts | 5 ++-- polars/series/string.ts | 31 ++++++++++++++++------- polars/shared_traits.ts | 5 +++- src/conversion.rs | 8 +++--- src/dataframe.rs | 31 +++++++++++++---------- src/lazy/dataframe.rs | 30 ++++++++++++++--------- src/lazy/dsl.rs | 49 ++++++++++++------------------------- src/series.rs | 32 +++++------------------- 17 files changed, 178 insertions(+), 148 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e657841d2..e10910796 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,14 +15,17 @@ crate-type = ["cdylib", "lib"] [dependencies] ahash = "0.8.7" bincode = "1.3.3" -napi = {version = "2.14.2", default-features = false, features = ["napi8", "serde-json"]} -napi-derive = {version = "2.14.6", default-features = false} -polars-core = {git = "https://github.com/pola-rs/polars.git", rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5", default-features = false} -polars-io = {git = "https://github.com/pola-rs/polars.git", rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5", default-features = false} -polars-lazy = {git = "https://github.com/pola-rs/polars.git", rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5", default-features = false} +napi = { version = "2.14.2", default-features = false, features = [ + "napi8", + "serde-json", +] } +napi-derive = { version = "2.14.6", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false } +polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false } thiserror = "1" -smartstring = {version = "1"} -serde_json = {version = "1"} +smartstring = { version = "1" } +serde_json = { version = "1" } either = "1.9" [dependencies.polars] @@ -54,7 +57,6 @@ features = [ "reinterpret", "mode", "extract_jsonpath", - "lazy_regex", "cum_agg", "rolling_window", "repeat_by", @@ -66,7 +68,6 @@ features = [ "pct_change", "moment", "diagonal_concat", - "horizontal_concat", "abs", "dot_diagram", "dataframe_arithmetic", @@ -89,10 +90,11 @@ features = [ "timezones", "peaks", "string_pad", - "cov" + "cov", + "group_by_list", ] git = "https://github.com/pola-rs/polars.git" -rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5" +rev = "3cf4897e679b056d17a235d48867035265d43cdc" [build-dependencies] napi-build = "2.1.0" @@ -103,4 +105,4 @@ lto = "fat" [features] default = ["range"] -range = ["polars-lazy/range"] \ No newline at end of file +range = ["polars-lazy/range"] diff --git a/__tests__/dataframe.test.ts b/__tests__/dataframe.test.ts index 550b6d542..78571773a 100644 --- a/__tests__/dataframe.test.ts +++ b/__tests__/dataframe.test.ts @@ -1582,13 +1582,13 @@ describe("io", () => { { name: "foo", datatype: "Float64", - bit_settings: "SORTED_ASC", + bit_settings: "", values: [1.0], }, { name: "bar", datatype: "String", - bit_settings: "SORTED_ASC", + bit_settings: "", values: ["a"], }, ], diff --git a/__tests__/expr.test.ts b/__tests__/expr.test.ts index 9528505a6..35feca101 100644 --- a/__tests__/expr.test.ts +++ b/__tests__/expr.test.ts @@ -104,9 +104,9 @@ describe("expr", () => { }); test.each` args | cumCount - ${undefined} | ${[0, 1, 2]} - ${true} | ${[2, 1, 0]} - ${{ reverse: true }} | ${[2, 1, 0]} + ${undefined} | ${[1, 2, 3]} + ${true} | ${[3, 2, 1]} + ${{ reverse: true }} | ${[3, 2, 1]} `("$# cumCount", ({ args, cumCount }) => { const df = pl.DataFrame({ a: [1, 2, 3] }); const expected = pl.DataFrame({ a: cumCount }); @@ -331,8 +331,8 @@ describe("expr", () => { }); test.each` args | hashValue - ${[0]} | ${6340063056640878722n} - ${[{ k0: 1n, k1: 1 }]} | ${9788354747012366704n} + ${[0]} | ${7355865757046787768n} + ${[{ k0: 1n, k1: 1 }]} | ${2179653058507248884n} `("$# hash", ({ args, hashValue }) => { const df = pl.DataFrame({ a: [1] }); const expected = pl.DataFrame({ hash: [hashValue] }); diff --git a/__tests__/series.test.ts b/__tests__/series.test.ts index 7ac45aad2..54f1def23 100644 --- a/__tests__/series.test.ts +++ b/__tests__/series.test.ts @@ -472,7 +472,7 @@ describe("series", () => { ${"getIndex"} | ${pl.Series(["a", "b", "c"]).getIndex(0)} | ${"a"} ${"hasValidity"} | ${pl.Series([1, null, 2]).hasValidity()} | ${true} ${"hasValidity"} | ${pl.Series([1, 1, 2]).hasValidity()} | ${false} - ${"hash"} | ${pl.Series([1]).hash()} | ${pl.Series([6340063056640878722n])} + ${"hash"} | ${pl.Series([1]).hash()} | ${pl.Series([7355865757046787768n])} ${"head"} | ${pl.Series([1, 2, 3, 4, 5, 5, 5]).head()} | ${pl.Series([1, 2, 3, 4, 5])} ${"head"} | ${pl.Series([1, 2, 3, 4, 5, 5, 5]).head(2)} | ${pl.Series([1, 2])} ${"interpolate"} | ${pl.Series([1, 2, null, null, 5]).interpolate()} | ${pl.Series([1, 2, 3, 4, 5])} @@ -520,8 +520,8 @@ describe("series", () => { ${"slice"} | ${pl.Series([1, 2, 3, 3, 0]).slice(-3, 3)} | ${pl.Series([3, 3, 0])} ${"slice"} | ${pl.Series([1, 2, 3, 3, 0]).slice(1, 3)} | ${pl.Series([2, 3, 3])} ${"sort"} | ${pl.Series([4, 2, 5, 1, 2, 3, 3, 0]).sort()} | ${pl.Series([0, 1, 2, 2, 3, 3, 4, 5])} - ${"sort"} | ${pl.Series([4, 2, 5, 0]).sort({ reverse: true })} | ${pl.Series([5, 4, 2, 0])} - ${"sort"} | ${pl.Series([4, 2, 5, 0]).sort({ reverse: false })} | ${pl.Series([0, 2, 4, 5])} + ${"sort"} | ${pl.Series([4, 2, 5, 0]).sort({ descending: true })} | ${pl.Series([5, 4, 2, 0])} + ${"sort"} | ${pl.Series([4, 2, 5, 0]).sort({ descending: false })} | ${pl.Series([0, 2, 4, 5])} ${"sum"} | ${pl.Series([1, 2, 2, 1]).sum()} | ${6} ${"tail"} | ${pl.Series([1, 2, 2, 1]).tail(2)} | ${pl.Series([2, 1])} ${"gatherEvery"} | ${pl.Series([1, 3, 2, 9, 1]).gatherEvery(2)} | ${pl.Series([1, 2, 1])} @@ -535,7 +535,7 @@ describe("series", () => { .gather([2])} | ${pl.Series([[6, 7, 8]])} ${"toArray"} | ${pl.Series([1, 2, 3]).toArray()} | ${[1, 2, 3]} ${"unique"} | ${pl.Series([1, 2, 3, 3]).unique().sort()} | ${pl.Series([1, 2, 3])} - ${"cumCount"} | ${pl.Series([1, 2, 3, 3]).cumCount()} | ${pl.Series([0, 1, 2, 3])} + ${"cumCount"} | ${pl.Series([1, 2, 3, 3]).cumCount()} | ${pl.Series([1, 2, 3, 4])} ${"shiftAndFill"} | ${pl.Series("foo", [1, 2, 3]).shiftAndFill(1, 99)} | ${pl.Series("foo", [99, 1, 2])} ${"bitand"} | ${pl .Series("bit", [1, 2, 3], pl.Int32) diff --git a/polars/lazy/expr/index.ts b/polars/lazy/expr/index.ts index 86c88af28..c13008642 100644 --- a/polars/lazy/expr/index.ts +++ b/polars/lazy/expr/index.ts @@ -9,7 +9,12 @@ export type { ExprStruct as StructNamespace } from "./struct"; import { DataType } from "../../datatypes"; import pli from "../../internals/polars_internal"; -import { ExprOrString, selectionToExprList, INSPECT_SYMBOL } from "../../utils"; +import { + ExprOrString, + selectionToExprList, + INSPECT_SYMBOL, + regexToString, +} from "../../utils"; import { Series } from "../../series"; import { Arithmetic, @@ -23,6 +28,7 @@ import { EwmOps, } from "../../shared_traits"; import { InterpolationMethod, FillNullStrategy, RankMethod } from "../../types"; +import { isRegExp } from "util/types"; /** * Expressions that can be used in various contexts. */ @@ -1246,6 +1252,9 @@ export const Expr: ExprConstructor = Object.assign(_Expr, { }); export const exprToLitOrExpr = (expr: any, stringToLit = true): Expr => { + if (isRegExp(expr)) { + return _Expr(pli.lit(regexToString(expr))); + } if (typeof expr === "string" && !stringToLit) { return _Expr(pli.col(expr)); } diff --git a/polars/lazy/expr/list.ts b/polars/lazy/expr/list.ts index 330009333..c1fb18fbe 100644 --- a/polars/lazy/expr/list.ts +++ b/polars/lazy/expr/list.ts @@ -68,8 +68,19 @@ export const ExprListFunctions = (_expr: any): ExprList => { first() { return this.get(0); }, - join(separator = ",") { - return wrap("listJoin", exprToLitOrExpr(separator)._expr); + join(options?) { + if (typeof options === "string") { + options = { separator: options }; + } + options = options ?? {}; + let separator = options?.separator ?? ","; + const ignoreNulls = options?.ignoreNulls ?? false; + + if (!Expr.isExpr(separator)) { + separator = pli.lit(separator); + } + + return wrap("listJoin", separator, ignoreNulls); }, last() { return this.get(-1); diff --git a/polars/lazy/expr/string.ts b/polars/lazy/expr/string.ts index 7546619d6..2232ff816 100644 --- a/polars/lazy/expr/string.ts +++ b/polars/lazy/expr/string.ts @@ -2,6 +2,7 @@ import { StringFunctions } from "../../shared_traits"; import { DataType } from "../../datatypes"; import { regexToString } from "../../utils"; import { Expr, _Expr, exprToLitOrExpr } from "../expr"; +import { lit } from "../functions"; /** * namespace containing expr string functions @@ -106,7 +107,7 @@ export interface StringNamespace extends StringFunctions { * └─────────┘ * ``` */ - extract(pat: string | RegExp, groupIndex: number): Expr; + extract(pat: any, groupIndex: number): Expr; /** * Parse string values as JSON. * Throw errors if encounter invalid JSON strings. @@ -267,7 +268,7 @@ export interface StringNamespace extends StringFunctions { * └──────────┘ * ``` */ - zFill(length: number): Expr; + zFill(length: number | Expr): Expr; /** * Add a trailing fillChar to a string until string length is reached. * If string is longer or equal to given length no modifications will be done @@ -306,7 +307,7 @@ export interface StringNamespace extends StringFunctions { * @param start - Start of the slice (negative indexing may be used). * @param length - Optional length of the slice. */ - slice(start: number, length?: number): Expr; + slice(start: number | Expr, length?: number | Expr): Expr; /** * Split a string into substrings using the specified separator and return them as a Series. * @param separator — A string that identifies character or characters to use in separating the string. @@ -364,8 +365,8 @@ export const ExprStringFunctions = (_expr: any): StringNamespace => { throw new RangeError("supported encodings are 'hex' and 'base64'"); } }, - extract(pat: string | RegExp, groupIndex: number) { - return wrap("strExtract", regexToString(pat), groupIndex); + extract(pat: any, groupIndex: number) { + return wrap("strExtract", exprToLitOrExpr(pat, true)._expr, groupIndex); }, jsonExtract(dtype?: DataType, inferSchemaLength?: number) { return wrap("strJsonDecode", dtype, inferSchemaLength); @@ -394,13 +395,23 @@ export const ExprStringFunctions = (_expr: any): StringNamespace => { padStart(length: number, fillChar: string) { return wrap("strPadStart", length, fillChar); }, - zFill(length: number) { - return wrap("strZFill", length); + zFill(length: number | Expr) { + if (!Expr.isExpr(length)) { + length = lit(length)._expr; + } + return wrap("zfill", length); }, padEnd(length: number, fillChar: string) { return wrap("strPadEnd", length, fillChar); }, - slice(start: number, length?: number) { + slice(start, length?) { + if (!Expr.isExpr(start)) { + start = lit(start)._expr; + } + if (!Expr.isExpr(length)) { + length = lit(length)._expr; + } + return wrap("strSlice", start, length); }, split(by: string, options?) { diff --git a/polars/lazy/functions.ts b/polars/lazy/functions.ts index 4c883b057..f2ef87ba3 100644 --- a/polars/lazy/functions.ts +++ b/polars/lazy/functions.ts @@ -305,15 +305,23 @@ export function concatList(...exprs): Expr { } /** Concat Utf8 Series in linear time. Non utf8 columns are cast to utf8. */ -export function concatString(opts: { exprs: ExprOrString[]; sep: string }); -export function concatString(exprs: ExprOrString[], sep?: string); -export function concatString(opts, sep = ",") { +export function concatString(opts: { + exprs: ExprOrString[]; + sep: string; + ignoreNulls?: boolean; +}); +export function concatString( + exprs: ExprOrString[], + sep?: string, + ignoreNulls?: boolean, +); +export function concatString(opts, sep = ",", ignoreNulls = true) { if (opts?.exprs) { - return concatString(opts.exprs, opts.sep); + return concatString(opts.exprs, opts.sep, opts.ignoreNulls); } const items = selectionToExprList(opts as any, false); - return (Expr as any)(pli.concatStr(items, sep)); + return (Expr as any)(pli.concatStr(items, sep, ignoreNulls)); } /** Count the number of values in this column. */ diff --git a/polars/series/index.ts b/polars/series/index.ts index 1c6489ee9..e2ec1dd35 100644 --- a/polars/series/index.ts +++ b/polars/series/index.ts @@ -865,7 +865,8 @@ export interface Series slice(start: number, length?: number): Series; /** * __Sort this Series.__ - * @param reverse - Reverse sort + * @param descending - Sort in descending order. + * @param nullsLast - Place nulls at the end. * @example * ``` * s = pl.Series("a", [1, 3, 4, 2]) @@ -878,7 +879,7 @@ export interface Series * 3 * 4 * ] - * s.sort(true) + * s.sort({descending: true}) * shape: (4,) * Series: 'a' [i64] * [ @@ -890,8 +891,7 @@ export interface Series * ``` */ sort(): Series; - sort(reverse?: boolean): Series; - sort(options: { reverse: boolean }): Series; + sort(options: { descending?: boolean; nullsLast?: boolean }): Series; /** * Reduce this Series to the sum value. * @example @@ -1684,12 +1684,10 @@ export function _Series(_s: any): Series { return wrap("slice", offset.offset, offset.length); }, - sort(reverse?) { - if (typeof reverse === "boolean") { - return wrap("sort", reverse); - } + sort(options?) { + options = { descending: false, nullsLast: false, ...(options ?? {}) }; - return wrap("sort", reverse?.reverse ?? false); + return wrap("sort", options.descending, options.nullsLast); }, sub(field) { return dtypeWrap("Sub", field); diff --git a/polars/series/list.ts b/polars/series/list.ts index d9fcb44f3..077415e3c 100644 --- a/polars/series/list.ts +++ b/polars/series/list.ts @@ -1,4 +1,5 @@ import { Series, _Series } from "."; +import { exprToLitOrExpr } from ".."; import { col } from "../lazy/functions"; import { ListFunctions } from "../shared_traits"; @@ -49,8 +50,8 @@ export const SeriesListFunctions = (_s): ListFunctions => { tail(n = 5) { return this.slice(-n, n); }, - join(separator = ",") { - return wrap("join", separator); + join(options?) { + return wrap("join", options); }, last() { return wrap("get", -1); diff --git a/polars/series/string.ts b/polars/series/string.ts index d7c9612b6..21ad17b3b 100644 --- a/polars/series/string.ts +++ b/polars/series/string.ts @@ -1,3 +1,4 @@ +import { Expr } from "./../lazy/expr/index"; import { DataType } from "../datatypes"; import { _Series, Series } from "."; import { regexToString } from "../utils"; @@ -92,7 +93,7 @@ export interface StringNamespace extends StringFunctions { * └─────────┘ * ``` */ - extract(pattern: string | RegExp, groupIndex: number): Series; + extract(pattern: any, groupIndex: number): Series; /*** * Parse string values as JSON. * @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing. @@ -222,7 +223,7 @@ export interface StringNamespace extends StringFunctions { * └──────────┘ * ``` */ - zFill(length: number): Series; + zFill(length: number | Expr): Series; /** Add trailing zeros */ padEnd(length: number, fillChar: string): Series; /** @@ -251,7 +252,7 @@ export interface StringNamespace extends StringFunctions { * @param start - Start of the slice (negative indexing may be used). * @param length - Optional length of the slice. */ - slice(start: number, length?: number): Series; + slice(start: number | Expr, length?: number | Expr): Series; /** * Split a string into substrings using the specified separator. * The return type will by of type List @@ -313,8 +314,12 @@ export const SeriesStringFunctions = (_s: any): StringNamespace => { throw new RangeError("supported encodings are 'hex' and 'base64'"); } }, - extract(pat: string | RegExp, groupIndex: number) { - return wrap("strExtract", regexToString(pat), groupIndex); + extract(pat: any, groupIndex: number) { + const s = _Series(_s); + return s + .toFrame() + .select(col(s.name).str.extract(pat, groupIndex).as(s.name)) + .getColumn(s.name); }, jsonExtract(dtype?: DataType, inferSchemaLength?: number) { return wrap("strJsonDecode", dtype, inferSchemaLength); @@ -334,8 +339,11 @@ export const SeriesStringFunctions = (_s: any): StringNamespace => { padStart(length: number, fillChar: string) { return wrap("strPadStart", length, fillChar); }, - zFill(length: number) { - return wrap("strZFill", length); + zFill(length) { + return _Series(_s) + .toFrame() + .select(col(_s.name).str.zFill(length).as(_s.name)) + .getColumn(_s.name); }, padEnd(length: number, fillChar: string) { return wrap("strPadEnd", length, fillChar); @@ -349,8 +357,13 @@ export const SeriesStringFunctions = (_s: any): StringNamespace => { rstrip() { return wrap("strReplace", /[ \t]+$/.source, ""); }, - slice(start: number, length?: number) { - return wrap("strSlice", start, length); + slice(start, length?) { + const s = _Series(_s); + + return s + .toFrame() + .select(col(s.name).str.slice(start, length).as(s.name)) + .getColumn(s.name); }, split(by: string, options?) { const inclusive = diff --git a/polars/shared_traits.ts b/polars/shared_traits.ts index a2540ec8b..b805ef4ad 100644 --- a/polars/shared_traits.ts +++ b/polars/shared_traits.ts @@ -663,9 +663,12 @@ export interface ListFunctions { * This errors if inner type of list `!= Utf8`. * @param separator A string used to separate one element of the list from the next in the resulting string. * If omitted, the list elements are separated with a comma. + * @param ignoreNulls - If true, null values will be ignored. * @category List */ - join(separator?: string): T; + join(): T; + join(separator: string | Expr): T; + join(options: { separator?: string | Expr; ignoreNulls?: boolean }): T; /** * Get the last value of the sublists. * @category List diff --git a/src/conversion.rs b/src/conversion.rs index 398a5500b..db34f0721 100644 --- a/src/conversion.rs +++ b/src/conversion.rs @@ -5,13 +5,13 @@ use napi::{ JsBigInt, JsBoolean, JsDate, JsNumber, JsObject, JsString, JsUnknown, Result, ValueType, }; use polars::frame::NullStrategy; -use polars::io::RowCount; use polars::prelude::Expr; use polars::prelude::*; use polars_core::prelude::FillNullStrategy; use polars_core::prelude::{Field, Schema}; use polars_core::series::ops::NullBehavior; use polars_io::parquet::ParallelStrategy; +use polars_io::RowIndex; use std::any::Any; use std::collections::HashMap; @@ -166,7 +166,7 @@ impl FromNapiValue for Wrap { unsafe fn from_napi_value(env: sys::napi_env, napi_val: sys::napi_value) -> JsResult { let arr = Array::from_napi_value(env, napi_val)?; let len = arr.len() as usize; - let mut builder = StringChunkedBuilder::new("", len, len * 25); + let mut builder = StringChunkedBuilder::new("", len); for i in 0..len { match arr.get::(i as u32) { Ok(val) => match val { @@ -553,9 +553,9 @@ pub struct JsRowCount { pub offset: u32, } -impl From for RowCount { +impl From for RowIndex { fn from(o: JsRowCount) -> Self { - RowCount { + RowIndex { name: o.name, offset: o.offset, } diff --git a/src/dataframe.rs b/src/dataframe.rs index a47182f88..11d033d0e 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -4,11 +4,13 @@ use crate::series::JsSeries; use napi::JsUnknown; use polars::frame::row::{infer_schema, Row}; use polars::frame::NullStrategy; -use polars::io::RowCount; +use polars_io::RowIndex; + use std::borrow::Borrow; use std::collections::HashMap; use std::fs::File; use std::io::{BufReader, BufWriter, Cursor}; +use std::num::NonZeroUsize; use std::sync::Arc; #[napi] @@ -91,7 +93,7 @@ pub fn read_csv( options: ReadCsvOptions, ) -> napi::Result { let null_values = options.null_values.map(|w| w.0); - let row_count = options.row_count.map(RowCount::from); + let row_count = options.row_count.map(RowIndex::from); let projection = options .projection .map(|p: Vec| p.into_iter().map(|p| p as usize).collect()); @@ -144,7 +146,7 @@ pub fn read_csv( .with_null_values(null_values) .with_try_parse_dates(options.try_parse_dates) .with_quote_char(quote_char) - .with_row_count(row_count) + .with_row_index(row_count) .sample_size(options.sample_size as usize) .with_skip_rows_after_header(options.skip_rows_after_header as usize) .raise_if_empty(options.raise_if_empty) @@ -175,7 +177,7 @@ pub fn read_csv( .with_null_values(null_values) .with_try_parse_dates(options.try_parse_dates) .with_quote_char(quote_char) - .with_row_count(row_count) + .with_row_index(row_count) .sample_size(options.sample_size as usize) .with_skip_rows_after_header(options.skip_rows_after_header as usize) .raise_if_empty(options.raise_if_empty) @@ -206,7 +208,9 @@ pub fn read_json_lines( options: ReadJsonOptions, ) -> napi::Result { let infer_schema_length = options.infer_schema_length.unwrap_or(100) as usize; - let batch_size = options.batch_size.map(|b| b as usize); + let batch_size = options + .batch_size + .map(|b| NonZeroUsize::try_from(b as usize).unwrap()); let df = match path_or_buffer { Either::A(path) => JsonLineReader::from_path(path) @@ -233,6 +237,7 @@ pub fn read_json( ) -> napi::Result { let infer_schema_length = options.infer_schema_length.unwrap_or(100) as usize; let batch_size = options.batch_size.unwrap_or(10000) as usize; + let batch_size = NonZeroUsize::new(batch_size).unwrap(); let format: JsonFormat = options .format .map(|s| match s.as_ref() { @@ -298,7 +303,7 @@ pub fn read_parquet( .with_columns(columns) .read_parallel(parallel.0) .with_n_rows(n_rows) - .with_row_count(row_count) + .with_row_index(row_count) .finish() } Either::B(buf) => { @@ -308,7 +313,7 @@ pub fn read_parquet( .with_columns(columns) .read_parallel(parallel.0) .with_n_rows(n_rows) - .with_row_count(row_count) + .with_row_index(row_count) .finish() } }; @@ -344,7 +349,7 @@ pub fn read_ipc( .with_projection(projection) .with_columns(columns) .with_n_rows(n_rows) - .with_row_count(row_count) + .with_row_index(row_count) .finish() } Either::B(buf) => { @@ -353,7 +358,7 @@ pub fn read_ipc( .with_projection(projection) .with_columns(columns) .with_n_rows(n_rows) - .with_row_count(row_count) + .with_row_index(row_count) .finish() } }; @@ -870,7 +875,7 @@ impl JsDataFrame { pub fn with_row_count(&self, name: String, offset: Option) -> napi::Result { let df = self .df - .with_row_count(&name, offset) + .with_row_index(&name, offset) .map_err(JsPolarsErr::from)?; Ok(df.into()) } @@ -906,9 +911,9 @@ impl JsDataFrame { }; fun( &self.df, - values, index, columns, + Some(values), sort_columns, aggregate_expr.map(|e| e.0 as Expr), separator, @@ -1053,8 +1058,8 @@ impl JsDataFrame { } #[napi(catch_unwind)] - pub fn transpose( - &self, + pub unsafe fn transpose( + &mut self, keep_names_as: Option, names: Option>>, ) -> napi::Result { diff --git a/src/lazy/dataframe.rs b/src/lazy/dataframe.rs index 2eca2f79a..429fd0947 100644 --- a/src/lazy/dataframe.rs +++ b/src/lazy/dataframe.rs @@ -2,12 +2,13 @@ use super::dsl::*; use crate::dataframe::JsDataFrame; use crate::prelude::*; use napi::{Env, Task}; -use polars::io::RowCount; use polars::lazy::frame::{LazyCsvReader, LazyFrame, LazyGroupBy}; use polars::prelude::{col, lit, ClosedWindow, CsvEncoding, DataFrame, Field, JoinType, Schema}; use polars_io::cloud::CloudOptions; use polars_io::parquet::ParallelStrategy; +use polars_io::RowIndex; use std::collections::HashMap; +use std::num::NonZeroUsize; use std::path::PathBuf; #[napi] @@ -517,13 +518,13 @@ impl JsLazyFrame { #[napi(catch_unwind)] pub fn with_row_count(&self, name: String, offset: Option) -> JsLazyFrame { let ldf = self.ldf.clone(); - ldf.with_row_count(&name, offset).into() + ldf.with_row_index(&name, offset).into() } #[napi(catch_unwind)] pub fn drop_columns(&self, colss: Vec) -> JsLazyFrame { let ldf = self.ldf.clone(); - ldf.drop_columns(colss).into() + ldf.drop(colss).into() } #[napi(js_name = "clone", catch_unwind)] pub fn clone(&self) -> JsLazyFrame { @@ -573,6 +574,7 @@ impl JsLazyFrame { }; let batch_size = options.batch_size.map(|bs| bs).unwrap_or(1024) as usize; + let batch_size = NonZeroUsize::new(batch_size).unwrap(); let include_bom = options.include_bom.unwrap_or(false); let include_header = options.include_header.unwrap_or(true); let maintain_order = options.maintain_order; @@ -610,7 +612,9 @@ impl JsLazyFrame { let path_buf: PathBuf = PathBuf::from(path); let ldf = self.ldf.clone().with_comm_subplan_elim(false); - let _ = ldf.sink_parquet(path_buf, options).map_err(JsPolarsErr::from); + let _ = ldf + .sink_parquet(path_buf, options) + .map_err(JsPolarsErr::from); Ok(()) } } @@ -645,7 +649,7 @@ pub struct ScanCsvOptions { #[napi(catch_unwind)] pub fn scan_csv(path: String, options: ScanCsvOptions) -> napi::Result { let n_rows = options.n_rows.map(|i| i as usize); - let row_count = options.row_count.map(RowCount::from); + let row_count = options.row_count.map(RowIndex::from); let missing_utf8_is_empty_string: bool = options.missing_utf8_is_empty_string.unwrap_or(false); let quote_char = if let Some(s) = options.quote_char { if s.is_empty() { @@ -690,7 +694,7 @@ pub fn scan_csv(path: String, options: ScanCsvOptions) -> napi::Result napi::Result = options.row_count.map(|rc| rc.into()); + let row_index: Option = options.row_count.map(|rc| rc.into()); let rechunk = options.rechunk.unwrap_or(false); let low_memory = options.low_memory.unwrap_or(false); let use_statistics = options.use_statistics.unwrap_or(false); @@ -731,7 +735,7 @@ pub fn scan_parquet(path: String, options: ScanParquetOptions) -> napi::Result napi::Result = options.row_count.map(|rc| rc.into()); + let row_index: Option = options.row_count.map(|rc| rc.into()); let args = ScanArgsIpc { n_rows, cache, rechunk, - row_count, + row_index, memmap, }; let lf = LazyFrame::scan_ipc(path, args).map_err(JsPolarsErr::from)?; @@ -781,10 +785,12 @@ pub struct JsonScanOptions { #[napi(catch_unwind)] pub fn scan_json(path: String, options: JsonScanOptions) -> napi::Result { + let batch_size = options.batch_size as usize; + let batch_size = NonZeroUsize::new(batch_size); LazyJsonLineReader::new(path) - .with_batch_size(Some(options.batch_size as usize)) + .with_batch_size(batch_size) .low_memory(options.low_memory.unwrap_or(false)) - .with_row_count(options.row_count.map(|rc| rc.into())) + .with_row_index(options.row_count.map(|rc| rc.into())) .with_n_rows(options.num_rows.map(|i| i as usize)) .finish() .map_err(|err| napi::Error::from_reason(format!("{:?}", err))) diff --git a/src/lazy/dsl.rs b/src/lazy/dsl.rs index 4498faea1..a0fad40f6 100644 --- a/src/lazy/dsl.rs +++ b/src/lazy/dsl.rs @@ -686,18 +686,10 @@ impl JsExpr { } #[napi(catch_unwind)] - pub fn str_z_fill(&self, width: i64) -> JsExpr { - let function = move |s: Series| { - let ca = s.str()?; - Ok(Some(ca.zfill(width as usize).into_series())) - }; - - self.clone() - .inner - .map(function, GetOutput::from_type(DataType::String)) - .with_fmt("str.z_fill") - .into() + pub fn zfill(&self, length: &JsExpr) -> Self { + self.inner.clone().str().zfill(length.inner.clone()).into() } + #[napi(catch_unwind)] pub fn str_to_uppercase(&self) -> JsExpr { let function = |s: Series| { @@ -710,17 +702,13 @@ impl JsExpr { .with_fmt("str.to_uppercase") .into() } + #[napi(catch_unwind)] - pub fn str_slice(&self, start: i64, length: Option) -> JsExpr { - let function = move |s: Series| { - let length = length.map(|l| l as u64); - let ca = s.str()?; - Ok(Some(ca.str_slice(start, length).into_series())) - }; - self.clone() - .inner - .map(function, GetOutput::from_type(DataType::String)) - .with_fmt("str.slice") + pub fn str_slice(&self, offset: &JsExpr, length: &JsExpr) -> JsExpr { + self.inner + .clone() + .str() + .slice(offset.inner.clone(), length.inner.clone()) .into() } @@ -876,11 +864,11 @@ impl JsExpr { .into() } #[napi(catch_unwind)] - pub fn str_extract(&self, pat: String, group_index: i64) -> JsExpr { + pub fn str_extract(&self, pat: &JsExpr, group_index: i64) -> JsExpr { self.inner .clone() .str() - .extract(&pat, group_index as usize) + .extract(pat.inner.clone(), group_index as usize) .into() } #[napi(catch_unwind)] @@ -1221,11 +1209,11 @@ impl JsExpr { self.inner.clone().list().get(index.inner.clone()).into() } #[napi(catch_unwind)] - pub fn list_join(&self, separator: &JsExpr) -> JsExpr { + pub fn list_join(&self, separator: &JsExpr, ignore_nulls: bool) -> JsExpr { self.inner .clone() .list() - .join(separator.inner.clone()) + .join(separator.inner.clone(), ignore_nulls) .into() } #[napi(catch_unwind)] @@ -1566,11 +1554,6 @@ pub fn col(name: String) -> JsExpr { dsl::col(&name).into() } -#[napi(catch_unwind)] -pub fn count() -> JsExpr { - dsl::count().into() -} - #[napi(catch_unwind)] pub fn first() -> JsExpr { dsl::first().into() @@ -1665,9 +1648,9 @@ pub fn concat_lst(s: Vec<&JsExpr>) -> JsResult { } #[napi(catch_unwind)] -pub fn concat_str(s: Vec<&JsExpr>, sep: String) -> JsExpr { - let s = s.to_exprs(); - dsl::concat_str(s, &sep).into() +pub fn concat_str(s: Vec<&JsExpr>, separator: String, ignore_nulls: bool) -> JsExpr { + let s = s.into_iter().map(|e| e.inner.clone()).collect::>(); + dsl::concat_str(s, &separator, ignore_nulls).into() } #[napi(catch_unwind)] diff --git a/src/series.rs b/src/series.rs index 8f7bf8857..35c7bfc7b 100644 --- a/src/series.rs +++ b/src/series.rs @@ -488,11 +488,12 @@ impl JsSeries { pub fn tail(&self, length: Option) -> JsSeries { (self.series.tail(length.map(|l| l as usize))).into() } + #[napi(catch_unwind)] - pub fn sort(&self, reverse: Option) -> JsSeries { - let reverse = reverse.unwrap_or(false); - self.series.sort(reverse).into() + pub unsafe fn sort(&mut self, descending: bool, nulls_last: bool) -> Self { + self.series.sort(descending, nulls_last).into() } + #[napi] pub fn argsort( &self, @@ -886,16 +887,6 @@ impl JsSeries { Ok(s.into()) } - #[napi(catch_unwind)] - pub fn str_extract(&self, pat: String, group_index: i64) -> napi::Result { - let ca = self.series.str().map_err(JsPolarsErr::from)?; - let s = ca - .extract(&pat, group_index as usize) - .map_err(JsPolarsErr::from)? - .into_series(); - Ok(s.into()) - } - #[napi(catch_unwind)] pub fn str_replace(&self, pat: String, val: String) -> napi::Result { let ca = self.series.str().map_err(JsPolarsErr::from)?; @@ -930,13 +921,6 @@ impl JsSeries { Ok(s.into()) } - #[napi(catch_unwind)] - pub fn str_slice(&self, start: i64, length: Option) -> napi::Result { - let ca = self.series.str().map_err(JsPolarsErr::from)?; - let s = ca.str_slice(start, length.map(|l| l as u64)).into_series(); - Ok(s.into()) - } - #[napi(catch_unwind)] pub fn str_hex_encode(&self) -> napi::Result { let ca = self.series.str().map_err(JsPolarsErr::from)?; @@ -983,12 +967,7 @@ impl JsSeries { .into_series(); Ok(s.into()) } - #[napi(catch_unwind)] - pub fn str_z_fill(&self, length: i64) -> napi::Result { - let ca = self.series.str().map_err(JsPolarsErr::from)?; - let s = ca.zfill(length as usize).into_series(); - Ok(s.into()) - } + #[napi(catch_unwind)] pub fn strftime(&self, fmt: String) -> napi::Result { let s = self.series.strftime(&fmt).map_err(JsPolarsErr::from)?; @@ -1011,6 +990,7 @@ impl JsSeries { // let df = self.series.to_dummies().map_err(JsPolarsErr::from)?; // Ok(df.into()) } + #[napi(catch_unwind)] pub fn get_list(&self, index: i64) -> Option { if let Ok(ca) = &self.series.list() {