Skip to content

Commit

Permalink
chore: Add criterion benchmarks for casting between integer types (#401)
Browse files Browse the repository at this point in the history
* Add cargo bench for casting between int types

* Update core/benches/cast_from_string.rs

Co-authored-by: comphead <[email protected]>

---------

Co-authored-by: comphead <[email protected]>
  • Loading branch information
andygrove and comphead authored May 9, 2024
1 parent 1403380 commit bc6b2cd
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 15 deletions.
6 changes: 5 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,9 @@ name = "row_columnar"
harness = false

[[bench]]
name = "cast"
name = "cast_from_string"
harness = false

[[bench]]
name = "cast_numeric"
harness = false
34 changes: 20 additions & 14 deletions core/benches/cast.rs → core/benches/cast_from_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,7 @@ use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
let mut b = StringBuilder::new();
for i in 0..1000 {
if i % 10 == 0 {
b.append_null();
} else if i % 2 == 0 {
b.append_value(format!("{}", rand::random::<f64>()));
} else {
b.append_value(format!("{}", rand::random::<i64>()));
}
}
let array = b.finish();
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
let batch = create_utf8_batch();
let expr = Arc::new(Column::new("a", 0));
let timezone = "".to_string();
let cast_string_to_i8 = Cast::new(
Expand All @@ -58,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) {
);
let cast_string_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone);

let mut group = c.benchmark_group("cast");
let mut group = c.benchmark_group("cast_string_to_int");
group.bench_function("cast_string_to_i8", |b| {
b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
});
Expand All @@ -73,6 +61,24 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}

// Create UTF8 batch with strings representing ints, floats, nulls
fn create_utf8_batch() -> RecordBatch {
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
let mut b = StringBuilder::new();
for i in 0..1000 {
if i % 10 == 0 {
b.append_null();
} else if i % 2 == 0 {
b.append_value(format!("{}", rand::random::<f64>()));
} else {
b.append_value(format!("{}", rand::random::<i64>()));
}
}
let array = b.finish();
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
batch
}

fn config() -> Criterion {
Criterion::default()
}
Expand Down
79 changes: 79 additions & 0 deletions core/benches/cast_numeric.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow_array::{builder::Int32Builder, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use comet::execution::datafusion::expressions::cast::{Cast, EvalMode};
use criterion::{criterion_group, criterion_main, Criterion};
use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let batch = create_int32_batch();
let expr = Arc::new(Column::new("a", 0));
let timezone = "".to_string();
let cast_i32_to_i8 = Cast::new(
expr.clone(),
DataType::Int8,
EvalMode::Legacy,
timezone.clone(),
);
let cast_i32_to_i16 = Cast::new(
expr.clone(),
DataType::Int16,
EvalMode::Legacy,
timezone.clone(),
);
let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone);

let mut group = c.benchmark_group("cast_int_to_int");
group.bench_function("cast_i32_to_i8", |b| {
b.iter(|| cast_i32_to_i8.evaluate(&batch).unwrap());
});
group.bench_function("cast_i32_to_i16", |b| {
b.iter(|| cast_i32_to_i16.evaluate(&batch).unwrap());
});
group.bench_function("cast_i32_to_i64", |b| {
b.iter(|| cast_i32_to_i64.evaluate(&batch).unwrap());
});
}

fn create_int32_batch() -> RecordBatch {
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let mut b = Int32Builder::new();
for i in 0..1000 {
if i % 10 == 0 {
b.append_null();
} else {
b.append_value(rand::random::<i32>());
}
}
let array = b.finish();
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
batch
}

fn config() -> Criterion {
Criterion::default()
}

criterion_group! {
name = benches;
config = config();
targets = criterion_benchmark
}
criterion_main!(benches);

0 comments on commit bc6b2cd

Please sign in to comment.