forked from nitlang/nit
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
benchmarks: Added CSV benchmark to bench suite
Signed-off-by: Lucas Bajolet <[email protected]>
- Loading branch information
Showing
8 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#!/bin/bash | ||
# This file is part of NIT ( http://www.nitlanguage.org ). | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# Shell script to bench json parsers over different documents | ||
|
||
source ../bench_common.sh | ||
source ../bench_plot.sh | ||
|
||
## CONFIGURATION OPTIONS ## | ||
|
||
# Default number of times a command must be run with bench_command | ||
# Can be overrided with 'the option -n' | ||
count=5 | ||
|
||
## HANDLE OPTIONS ## | ||
|
||
function init_repo() | ||
{ | ||
mkdir -p inputs | ||
nitc --semi-global scripts/csv_gen.nit -o scripts/csv_gen | ||
echo "Generating 1000 lines documents" | ||
./scripts/csv_gen 10 1000 inputs/1000_l.csv | ||
./scripts/csv_gen 10 1000 inputs/1000_uni_l.csv --unicode | ||
echo "Generating 10000 lines documents" | ||
./scripts/csv_gen 10 10000 inputs/10000_l.csv | ||
./scripts/csv_gen 10 10000 inputs/10000_uni_l.csv --unicode | ||
echo "Generating 100000 lines documents" | ||
./scripts/csv_gen 10 100000 inputs/100000_l.csv | ||
./scripts/csv_gen 10 100000 inputs/100000_uni_l.csv --unicode | ||
echo "Generating 1000000 lines documents" | ||
./scripts/csv_gen 10 1000000 inputs/1000000_l.csv | ||
./scripts/csv_gen 10 1000000 inputs/1000000_uni_l.csv --unicode | ||
} | ||
|
||
function usage() | ||
{ | ||
echo "run_bench: ./csv_bench.sh [options]" | ||
echo " -v: verbose mode" | ||
echo " -n count: number of execution for each bar (default: $count)" | ||
echo " -h: this help" | ||
} | ||
|
||
stop=false | ||
fast=false | ||
while [ "$stop" = false ]; do | ||
case "$1" in | ||
-v) verbose=true; shift;; | ||
--fast) fast=true; shift;; | ||
-h) usage; exit;; | ||
-n) count="$2"; shift; shift;; | ||
*) stop=true | ||
esac | ||
done | ||
|
||
if [ -z "$fast" ]; then | ||
init_repo | ||
fi | ||
|
||
mkdir -p out | ||
|
||
echo "Compiling engines" | ||
|
||
echo "Java Parser" | ||
|
||
javac -cp './scripts/commons-csv-1.3.jar' scripts/JavaCSV.java | ||
|
||
echo "Go parser" | ||
|
||
go build -o scripts/go_csv scripts/go_csv.go | ||
|
||
echo "Nit/Ad-Hoc Parser" | ||
|
||
nitc --semi-global scripts/nit_csv.nit -o scripts/nit_csv | ||
|
||
declare -a script_names=('Python 3 - Pandas' 'Python 2 - Pandas' 'Go' 'Nit' 'Python 3 - Standard' 'Python 2 - Standard' 'Java - Apache commons' 'Ruby') | ||
declare -a script_cmds=('python3 scripts/python_csv.py' 'python2 scripts/python_csv.py' './scripts/go_csv' './scripts/nit_csv' 'python3 scripts/python_stdcsv.py' 'python2 scripts/python_stdcsv.py' "java -cp /usr/share/java/commons-csv.jar:. scripts.JavaCSV" 'ruby scripts/ruby_csv.rb') | ||
|
||
for script in `seq 1 ${#script_cmds[@]}`; do | ||
echo "Preparing res for ${script_names[$script - 1]}" | ||
prepare_res "./out/${script_names[$script - 1]}.dat" "${script_names[$script - 1]}" "${script_names[$script - 1]}" | ||
for file in inputs/*.csv; do | ||
fname=`basename $file .csv` | ||
bench_command $file "Benching file $file using ${script_cmds[$script - 1]} parser" ${script_cmds[$script - 1]} $file | ||
done; | ||
done; | ||
|
||
rm scripts/nit_csv | ||
rm scripts/JavaCSV.class | ||
rm scripts/go_csv | ||
|
||
plot out/bench_csv.gnu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package scripts; | ||
|
||
import java.io.File; | ||
import java.util.List; | ||
import java.nio.charset.Charset; | ||
import org.apache.commons.csv.*; | ||
|
||
class JavaCSV { | ||
public static void main(String[] args) { | ||
try { | ||
File csvData = new File(args[0]); | ||
CSVParser parser = CSVParser.parse(csvData, Charset.forName("UTF-8"), CSVFormat.RFC4180); | ||
List<CSVRecord> r = parser.getRecords(); | ||
} catch(Exception e) { | ||
System.err.println("Major fail"); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# This file is part of NIT ( http://www.nitlanguage.org ). | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import csv | ||
|
||
if args.length < 3 then | ||
print "Usage ./csv_gen record_length record_nb out_filepath [--unicode]" | ||
exit 1 | ||
end | ||
|
||
var record_length = args[0].to_i | ||
var record_nb = args[1].to_i | ||
var outpath = args[2] | ||
var unicode = false | ||
|
||
if args.length == 4 then | ||
if not args[3] == "--unicode" then | ||
print "Usage ./csv_gen record_length record_nb [--unicode]" | ||
exit 1 | ||
end | ||
unicode = true | ||
end | ||
|
||
var ocsv = new CsvDocument | ||
ocsv.eol = "\r\n" | ||
|
||
var sep = ocsv.separator.to_s | ||
var eol = ocsv.eol | ||
var del = ocsv.delimiter.to_s | ||
|
||
for i in [0 .. record_length[ do ocsv.header.add "Col{i}" | ||
|
||
var c = if unicode then "á" else "a" | ||
for i in [0 .. record_nb[ do | ||
var line = new Array[String].with_capacity(record_length) | ||
for j in [0 .. record_length[ do | ||
var add_sep = 100.rand > 70 | ||
var add_del = 100.rand > 70 | ||
var add_eol = 100.rand > 70 | ||
var ln = 10.rand | ||
var s = c * ln | ||
if add_sep then s = sep + s | ||
if add_del then s += del | ||
if add_eol then s += eol | ||
line.add s | ||
end | ||
ocsv.records.add line | ||
end | ||
|
||
ocsv.write_to_file(outpath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package main | ||
|
||
import "encoding/csv" | ||
import "os" | ||
import "fmt" | ||
|
||
func main() { | ||
if len(os.Args) == 1 { | ||
fmt.Println("Usage ./go_csv file") | ||
os.Exit(-1) | ||
} | ||
file, err := os.Open(os.Args[1]) | ||
if err != nil { panic(err) } | ||
|
||
var read = csv.NewReader(file) | ||
_, r := read.ReadAll() | ||
if r != nil { panic(err) } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# This file is part of NIT ( http://www.nitlanguage.org ). | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import csv | ||
|
||
if args.is_empty then | ||
print "Usage: ./nit_csv in.csv" | ||
exit 1 | ||
end | ||
|
||
var csv = new CsvReader(new FileReader.open(args[0])) | ||
csv.eol = "\r\n" | ||
|
||
csv.read_all |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import sys | ||
from pandas import read_csv | ||
|
||
csv = read_csv(sys.argv[1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import sys | ||
import csv | ||
|
||
lst = list(); | ||
with open(sys.argv[1], 'r') as f: | ||
reader = csv.reader(f, delimiter=':', quoting=csv.QUOTE_NONE) | ||
for row in reader: | ||
list.append(lst, row) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
require 'csv' | ||
|
||
CSV.read(ARGV.first) |