Skip to content

Commit

Permalink
Merge pull request #14 from CeleritasCelery/benchmarking
Browse files Browse the repository at this point in the history
Add line breaks, large benchmark, and throughput measures to benchmarks
  • Loading branch information
cessen authored Feb 6, 2023
2 parents 5b99a71 + a00ab3d commit b4440c3
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 45 deletions.
175 changes: 130 additions & 45 deletions benches/all.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#![allow(clippy::uninlined_format_args)]
use std::fs;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use str_indices::{chars, lines, lines_crlf, lines_lf, utf16};

fn all(c: &mut Criterion) {
// Load benchmark strings.
let test_strings: Vec<(&str, String)> = vec![
let test_strings = vec![
("en_0001", "E".into()),
(
"en_0010",
Expand All @@ -20,6 +20,12 @@ fn all(c: &mut Criterion) {
"en_1000",
fs::read_to_string("benches/text/en_1000.txt").expect("Cannot find benchmark text."),
),
(
"en_10000",
fs::read_to_string("benches/text/en_1000.txt")
.expect("Cannot find benchmark text.")
.repeat(10),
),
("jp_0003", "日".into()),
(
"jp_0102",
Expand All @@ -29,6 +35,32 @@ fn all(c: &mut Criterion) {
"jp_1001",
fs::read_to_string("benches/text/jp_1001.txt").expect("Cannot find benchmark text."),
),
(
"jp_10000",
fs::read_to_string("benches/text/jp_1001.txt")
.expect("Cannot find benchmark text.")
.repeat(10),
),
];

let line_strings = vec![
(
"lines_100",
fs::read_to_string("benches/text/lines.txt")
.expect("Cannot find benchmark text."),
),
(
"lines_1000",
fs::read_to_string("benches/text/lines.txt")
.expect("Cannot find benchmark text.")
.repeat(10),
),
(
"lines_10000",
fs::read_to_string("benches/text/lines.txt")
.expect("Cannot find benchmark text.")
.repeat(100),
),
];

//---------------------------------------------------------
Expand All @@ -38,6 +70,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("chars::count");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(chars::count(text));
Expand All @@ -50,6 +83,7 @@ fn all(c: &mut Criterion) {
// for performance comparisons.
let mut group = c.benchmark_group("chars::count_std");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(text.chars().count());
Expand All @@ -62,6 +96,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("chars::from_byte_idx");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
let idx = text.len();
bench.iter(|| {
Expand All @@ -75,6 +110,7 @@ fn all(c: &mut Criterion) {
// for performance comparisons.
let mut group = c.benchmark_group("chars::from_byte_idx_std");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("std::{}", text_name), |bench| {
let idx = text.len();
bench.iter(|| {
Expand All @@ -97,6 +133,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("chars::to_byte_idx");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
let idx = chars::count(text);
bench.iter(|| {
Expand All @@ -110,6 +147,7 @@ fn all(c: &mut Criterion) {
// for performance comparisons.
let mut group = c.benchmark_group("chars::to_byte_idx_std");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("std::{}", text_name), |bench| {
let idx = chars::count(text) - 1; // Minus 1 so we can unwrap below.
bench.iter(|| {
Expand All @@ -126,6 +164,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("utf16::count");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(utf16::count(text));
Expand All @@ -138,6 +177,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("utf16::count_surrogates");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(utf16::count_surrogates(text));
Expand All @@ -150,6 +190,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("utf16::from_byte_idx");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
let idx = text.len();
bench.iter(|| {
Expand All @@ -163,6 +204,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("utf16::to_byte_idx");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
let idx = utf16::count(text);
bench.iter(|| {
Expand All @@ -174,42 +216,64 @@ fn all(c: &mut Criterion) {

//---------------------------------------------------------
// Lines (unicode).
let unicode_line_breaks = [
("LF", "\u{000A}"),
("VT", "\u{000B}"),
("FF", "\u{000C}"),
("CR", "\u{000D}"),
("NEL", "\u{0085}"),
("LS", "\u{2028}"),
("PS", "\u{2029}"),
("CRLF", "\u{000D}\u{000A}"),
];

// lines::count_breaks()
{
let mut group = c.benchmark_group("lines::count_breaks");
for (text_name, text) in test_strings.iter() {
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(lines::count_breaks(text));
})
});
for (text_name, text) in line_strings.iter() {
for (break_name, line_break) in unicode_line_breaks {
let text = text.replace('\n', line_break);
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("{text_name}_{break_name}"), |bench| {
bench.iter(|| {
black_box(lines::count_breaks(&text));
})
});
}
}
}

// lines::from_byte_idx()
{
let mut group = c.benchmark_group("lines::from_byte_idx");
for (text_name, text) in test_strings.iter() {
group.bench_function(*text_name, |bench| {
let idx = text.len();
bench.iter(|| {
black_box(lines::from_byte_idx(text, idx));
})
});
for (text_name, text) in line_strings.iter() {
for (break_name, line_break) in unicode_line_breaks {
let text = text.replace('\n', line_break);
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("{text_name}_{break_name}"), |bench| {
let idx = text.len();
bench.iter(|| {
black_box(lines::from_byte_idx(&text, idx));
})
});
}
}
}

// lines::to_byte_idx()
{
let mut group = c.benchmark_group("lines::to_byte_idx");
for (text_name, text) in test_strings.iter() {
group.bench_function(*text_name, |bench| {
let idx = lines::count_breaks(text) + 1;
bench.iter(|| {
black_box(lines::to_byte_idx(text, idx));
})
});
for (text_name, text) in line_strings.iter() {
for (break_name, line_break) in unicode_line_breaks {
let text = &text.replace('\n', line_break);
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("{text_name}_{break_name}"), |bench| {
let idx = lines::count_breaks(text) + 1;
bench.iter(|| {
black_box(lines::to_byte_idx(text, idx));
})
});
}
}
}

Expand All @@ -219,7 +283,8 @@ fn all(c: &mut Criterion) {
// lines_lf::count_breaks()
{
let mut group = c.benchmark_group("lines_lf::count_breaks");
for (text_name, text) in test_strings.iter() {
for (text_name, text) in line_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(lines_lf::count_breaks(text));
Expand All @@ -235,7 +300,8 @@ fn all(c: &mut Criterion) {
// But it should be close enough for perf
// comparisons.
let mut group = c.benchmark_group("lines_lf::count_breaks_std");
for (text_name, text) in test_strings.iter() {
for (text_name, text) in line_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(text.lines().count());
Expand All @@ -247,7 +313,8 @@ fn all(c: &mut Criterion) {
// lines_lf::from_byte_idx()
{
let mut group = c.benchmark_group("lines_lf::from_byte_idx");
for (text_name, text) in test_strings.iter() {
for (text_name, text) in line_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
let idx = text.len();
bench.iter(|| {
Expand All @@ -261,6 +328,7 @@ fn all(c: &mut Criterion) {
{
let mut group = c.benchmark_group("lines_lf::to_byte_idx");
for (text_name, text) in test_strings.iter() {
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(*text_name, |bench| {
let idx = lines_lf::count_breaks(text) + 1;
bench.iter(|| {
Expand All @@ -272,42 +340,59 @@ fn all(c: &mut Criterion) {

//---------------------------------------------------------
// Lines (CRLF).
let crlf_line_breaks = [
("LF", "\u{000A}"),
("CR", "\u{000D}"),
("CRLF", "\u{000D}\u{000A}"),
];

// lines_crlf::count_breaks()
{
let mut group = c.benchmark_group("lines_crlf::count_breaks");
for (text_name, text) in test_strings.iter() {
group.bench_function(*text_name, |bench| {
bench.iter(|| {
black_box(lines_crlf::count_breaks(text));
})
});
for (text_name, text) in line_strings.iter() {
for (break_name, line_break) in crlf_line_breaks {
let text = &text.replace('\n', line_break);
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("{text_name}_{break_name}"), |bench| {
bench.iter(|| {
black_box(lines_crlf::count_breaks(text));
})
});
}
}
}

// lines_crlf::from_byte_idx()
{
let mut group = c.benchmark_group("lines_crlf::from_byte_idx");
for (text_name, text) in test_strings.iter() {
group.bench_function(*text_name, |bench| {
let idx = text.len();
bench.iter(|| {
black_box(lines_crlf::from_byte_idx(text, idx));
})
});
for (text_name, text) in line_strings.iter() {
for (break_name, line_break) in crlf_line_breaks {
let text = &text.replace('\n', line_break);
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("{text_name}_{break_name}"), |bench| {
let idx = text.len();
bench.iter(|| {
black_box(lines_crlf::from_byte_idx(text, idx));
})
});
}
}
}

// lines_crlf::to_byte_idx()
{
let mut group = c.benchmark_group("lines_crlf::to_byte_idx");
for (text_name, text) in test_strings.iter() {
group.bench_function(*text_name, |bench| {
let idx = lines_crlf::count_breaks(text) + 1;
bench.iter(|| {
black_box(lines_crlf::to_byte_idx(text, idx));
})
});
for (text_name, text) in line_strings.iter() {
for (break_name, line_break) in crlf_line_breaks {
let text = &text.replace('\n', line_break);
group.throughput(Throughput::Bytes(text.len() as u64));
group.bench_function(format!("{text_name}_{break_name}"), |bench| {
let idx = lines_crlf::count_breaks(text) + 1;
bench.iter(|| {
black_box(lines_crlf::to_byte_idx(text, idx));
})
});
}
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions benches/text/lines.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
日本 is a West Germanic ー百 that
出典: spoken in early
medieval 日本 eventually
became commons england

0 comments on commit b4440c3

Please sign in to comment.