Skip to content

Commit

Permalink
benchmarks: Added CSV benchmark to bench suite
Browse files Browse the repository at this point in the history
Signed-off-by: Lucas Bajolet <[email protected]>
  • Loading branch information
lbajolet committed May 13, 2016
1 parent ea71df8 commit 56ef574
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 0 deletions.
103 changes: 103 additions & 0 deletions benchmarks/csv/csv_bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/bin/bash
# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Shell script to bench json parsers over different documents

source ../bench_common.sh
source ../bench_plot.sh

## CONFIGURATION OPTIONS ##

# Default number of times a command must be run with bench_command
# Can be overrided with 'the option -n'
count=5

## HANDLE OPTIONS ##

function init_repo()
{
mkdir -p inputs
nitc --semi-global scripts/csv_gen.nit -o scripts/csv_gen
echo "Generating 1000 lines documents"
./scripts/csv_gen 10 1000 inputs/1000_l.csv
./scripts/csv_gen 10 1000 inputs/1000_uni_l.csv --unicode
echo "Generating 10000 lines documents"
./scripts/csv_gen 10 10000 inputs/10000_l.csv
./scripts/csv_gen 10 10000 inputs/10000_uni_l.csv --unicode
echo "Generating 100000 lines documents"
./scripts/csv_gen 10 100000 inputs/100000_l.csv
./scripts/csv_gen 10 100000 inputs/100000_uni_l.csv --unicode
echo "Generating 1000000 lines documents"
./scripts/csv_gen 10 1000000 inputs/1000000_l.csv
./scripts/csv_gen 10 1000000 inputs/1000000_uni_l.csv --unicode
}

function usage()
{
echo "run_bench: ./csv_bench.sh [options]"
echo " -v: verbose mode"
echo " -n count: number of execution for each bar (default: $count)"
echo " -h: this help"
}

stop=false
fast=false
while [ "$stop" = false ]; do
case "$1" in
-v) verbose=true; shift;;
--fast) fast=true; shift;;
-h) usage; exit;;
-n) count="$2"; shift; shift;;
*) stop=true
esac
done

if [ -z "$fast" ]; then
init_repo
fi

mkdir -p out

echo "Compiling engines"

echo "Java Parser"

javac -cp './scripts/commons-csv-1.3.jar' scripts/JavaCSV.java

echo "Go parser"

go build -o scripts/go_csv scripts/go_csv.go

echo "Nit/Ad-Hoc Parser"

nitc --semi-global scripts/nit_csv.nit -o scripts/nit_csv

declare -a script_names=('Python 3 - Pandas' 'Python 2 - Pandas' 'Go' 'Nit' 'Python 3 - Standard' 'Python 2 - Standard' 'Java - Apache commons' 'Ruby')
declare -a script_cmds=('python3 scripts/python_csv.py' 'python2 scripts/python_csv.py' './scripts/go_csv' './scripts/nit_csv' 'python3 scripts/python_stdcsv.py' 'python2 scripts/python_stdcsv.py' "java -cp /usr/share/java/commons-csv.jar:. scripts.JavaCSV" 'ruby scripts/ruby_csv.rb')

for script in `seq 1 ${#script_cmds[@]}`; do
echo "Preparing res for ${script_names[$script - 1]}"
prepare_res "./out/${script_names[$script - 1]}.dat" "${script_names[$script - 1]}" "${script_names[$script - 1]}"
for file in inputs/*.csv; do
fname=`basename $file .csv`
bench_command $file "Benching file $file using ${script_cmds[$script - 1]} parser" ${script_cmds[$script - 1]} $file
done;
done;

rm scripts/nit_csv
rm scripts/JavaCSV.class
rm scripts/go_csv

plot out/bench_csv.gnu
18 changes: 18 additions & 0 deletions benchmarks/csv/scripts/JavaCSV.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package scripts;

import java.io.File;
import java.util.List;
import java.nio.charset.Charset;
import org.apache.commons.csv.*;

class JavaCSV {
public static void main(String[] args) {
try {
File csvData = new File(args[0]);
CSVParser parser = CSVParser.parse(csvData, Charset.forName("UTF-8"), CSVFormat.RFC4180);
List<CSVRecord> r = parser.getRecords();
} catch(Exception e) {
System.err.println("Major fail");
}
}
}
61 changes: 61 additions & 0 deletions benchmarks/csv/scripts/csv_gen.nit
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import csv

if args.length < 3 then
print "Usage ./csv_gen record_length record_nb out_filepath [--unicode]"
exit 1
end

var record_length = args[0].to_i
var record_nb = args[1].to_i
var outpath = args[2]
var unicode = false

if args.length == 4 then
if not args[3] == "--unicode" then
print "Usage ./csv_gen record_length record_nb [--unicode]"
exit 1
end
unicode = true
end

var ocsv = new CsvDocument
ocsv.eol = "\r\n"

var sep = ocsv.separator.to_s
var eol = ocsv.eol
var del = ocsv.delimiter.to_s

for i in [0 .. record_length[ do ocsv.header.add "Col{i}"

var c = if unicode then "á" else "a"
for i in [0 .. record_nb[ do
var line = new Array[String].with_capacity(record_length)
for j in [0 .. record_length[ do
var add_sep = 100.rand > 70
var add_del = 100.rand > 70
var add_eol = 100.rand > 70
var ln = 10.rand
var s = c * ln
if add_sep then s = sep + s
if add_del then s += del
if add_eol then s += eol
line.add s
end
ocsv.records.add line
end

ocsv.write_to_file(outpath)
18 changes: 18 additions & 0 deletions benchmarks/csv/scripts/go_csv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package main

import "encoding/csv"
import "os"
import "fmt"

func main() {
if len(os.Args) == 1 {
fmt.Println("Usage ./go_csv file")
os.Exit(-1)
}
file, err := os.Open(os.Args[1])
if err != nil { panic(err) }

var read = csv.NewReader(file)
_, r := read.ReadAll()
if r != nil { panic(err) }
}
25 changes: 25 additions & 0 deletions benchmarks/csv/scripts/nit_csv.nit
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import csv

if args.is_empty then
print "Usage: ./nit_csv in.csv"
exit 1
end

var csv = new CsvReader(new FileReader.open(args[0]))
csv.eol = "\r\n"

csv.read_all
4 changes: 4 additions & 0 deletions benchmarks/csv/scripts/python_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import sys
from pandas import read_csv

csv = read_csv(sys.argv[1])
8 changes: 8 additions & 0 deletions benchmarks/csv/scripts/python_stdcsv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import sys
import csv

lst = list();
with open(sys.argv[1], 'r') as f:
reader = csv.reader(f, delimiter=':', quoting=csv.QUOTE_NONE)
for row in reader:
list.append(lst, row)
3 changes: 3 additions & 0 deletions benchmarks/csv/scripts/ruby_csv.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
require 'csv'

CSV.read(ARGV.first)

0 comments on commit 56ef574

Please sign in to comment.