diff --git a/benchmarks/csv/csv_bench.sh b/benchmarks/csv/csv_bench.sh new file mode 100755 index 0000000000..badd1e6102 --- /dev/null +++ b/benchmarks/csv/csv_bench.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shell script to bench json parsers over different documents + +source ../bench_common.sh +source ../bench_plot.sh + +## CONFIGURATION OPTIONS ## + +# Default number of times a command must be run with bench_command +# Can be overrided with 'the option -n' +count=5 + +## HANDLE OPTIONS ## + +function init_repo() +{ + mkdir -p inputs + nitc --semi-global scripts/csv_gen.nit -o scripts/csv_gen + echo "Generating 1000 lines documents" + ./scripts/csv_gen 10 1000 inputs/1000_l.csv + ./scripts/csv_gen 10 1000 inputs/1000_uni_l.csv --unicode + echo "Generating 10000 lines documents" + ./scripts/csv_gen 10 10000 inputs/10000_l.csv + ./scripts/csv_gen 10 10000 inputs/10000_uni_l.csv --unicode + echo "Generating 100000 lines documents" + ./scripts/csv_gen 10 100000 inputs/100000_l.csv + ./scripts/csv_gen 10 100000 inputs/100000_uni_l.csv --unicode + echo "Generating 1000000 lines documents" + ./scripts/csv_gen 10 1000000 inputs/1000000_l.csv + ./scripts/csv_gen 10 1000000 inputs/1000000_uni_l.csv --unicode +} + +function usage() +{ + echo "run_bench: ./csv_bench.sh [options]" + echo " -v: verbose mode" + echo " -n count: number of execution for each bar (default: $count)" + echo " -h: this help" +} + +stop=false +fast=false +while [ "$stop" = false ]; do + case "$1" in + -v) verbose=true; shift;; + --fast) fast=true; shift;; + -h) usage; exit;; + -n) count="$2"; shift; shift;; + *) stop=true + esac +done + +if [ -z "$fast" ]; then + init_repo +fi + +mkdir -p out + +echo "Compiling engines" + +echo "Java Parser" + +javac -cp './scripts/commons-csv-1.3.jar' scripts/JavaCSV.java + +echo "Go parser" + +go build -o scripts/go_csv scripts/go_csv.go + +echo "Nit/Ad-Hoc Parser" + +nitc --semi-global scripts/nit_csv.nit -o scripts/nit_csv + +declare -a script_names=('Python 3 - Pandas' 'Python 2 - Pandas' 'Go' 'Nit' 'Python 3 - Standard' 'Python 2 - Standard' 'Java - Apache commons' 'Ruby') +declare -a script_cmds=('python3 scripts/python_csv.py' 'python2 scripts/python_csv.py' './scripts/go_csv' './scripts/nit_csv' 'python3 scripts/python_stdcsv.py' 'python2 scripts/python_stdcsv.py' "java -cp /usr/share/java/commons-csv.jar:. scripts.JavaCSV" 'ruby scripts/ruby_csv.rb') + +for script in `seq 1 ${#script_cmds[@]}`; do + echo "Preparing res for ${script_names[$script - 1]}" + prepare_res "./out/${script_names[$script - 1]}.dat" "${script_names[$script - 1]}" "${script_names[$script - 1]}" + for file in inputs/*.csv; do + fname=`basename $file .csv` + bench_command $file "Benching file $file using ${script_cmds[$script - 1]} parser" ${script_cmds[$script - 1]} $file + done; +done; + +rm scripts/nit_csv +rm scripts/JavaCSV.class +rm scripts/go_csv + +plot out/bench_csv.gnu diff --git a/benchmarks/csv/scripts/JavaCSV.java b/benchmarks/csv/scripts/JavaCSV.java new file mode 100644 index 0000000000..f8264ca55a --- /dev/null +++ b/benchmarks/csv/scripts/JavaCSV.java @@ -0,0 +1,18 @@ +package scripts; + +import java.io.File; +import java.util.List; +import java.nio.charset.Charset; +import org.apache.commons.csv.*; + +class JavaCSV { + public static void main(String[] args) { + try { + File csvData = new File(args[0]); + CSVParser parser = CSVParser.parse(csvData, Charset.forName("UTF-8"), CSVFormat.RFC4180); + List r = parser.getRecords(); + } catch(Exception e) { + System.err.println("Major fail"); + } + } +} diff --git a/benchmarks/csv/scripts/csv_gen.nit b/benchmarks/csv/scripts/csv_gen.nit new file mode 100644 index 0000000000..123dbb62aa --- /dev/null +++ b/benchmarks/csv/scripts/csv_gen.nit @@ -0,0 +1,61 @@ +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv + +if args.length < 3 then + print "Usage ./csv_gen record_length record_nb out_filepath [--unicode]" + exit 1 +end + +var record_length = args[0].to_i +var record_nb = args[1].to_i +var outpath = args[2] +var unicode = false + +if args.length == 4 then + if not args[3] == "--unicode" then + print "Usage ./csv_gen record_length record_nb [--unicode]" + exit 1 + end + unicode = true +end + +var ocsv = new CsvDocument +ocsv.eol = "\r\n" + +var sep = ocsv.separator.to_s +var eol = ocsv.eol +var del = ocsv.delimiter.to_s + +for i in [0 .. record_length[ do ocsv.header.add "Col{i}" + +var c = if unicode then "รก" else "a" +for i in [0 .. record_nb[ do + var line = new Array[String].with_capacity(record_length) + for j in [0 .. record_length[ do + var add_sep = 100.rand > 70 + var add_del = 100.rand > 70 + var add_eol = 100.rand > 70 + var ln = 10.rand + var s = c * ln + if add_sep then s = sep + s + if add_del then s += del + if add_eol then s += eol + line.add s + end + ocsv.records.add line +end + +ocsv.write_to_file(outpath) diff --git a/benchmarks/csv/scripts/go_csv.go b/benchmarks/csv/scripts/go_csv.go new file mode 100644 index 0000000000..5fff932978 --- /dev/null +++ b/benchmarks/csv/scripts/go_csv.go @@ -0,0 +1,18 @@ +package main + +import "encoding/csv" +import "os" +import "fmt" + +func main() { + if len(os.Args) == 1 { + fmt.Println("Usage ./go_csv file") + os.Exit(-1) + } + file, err := os.Open(os.Args[1]) + if err != nil { panic(err) } + + var read = csv.NewReader(file) + _, r := read.ReadAll() + if r != nil { panic(err) } +} diff --git a/benchmarks/csv/scripts/nit_csv.nit b/benchmarks/csv/scripts/nit_csv.nit new file mode 100644 index 0000000000..c8422d12d4 --- /dev/null +++ b/benchmarks/csv/scripts/nit_csv.nit @@ -0,0 +1,25 @@ +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv + +if args.is_empty then + print "Usage: ./nit_csv in.csv" + exit 1 +end + +var csv = new CsvReader(new FileReader.open(args[0])) +csv.eol = "\r\n" + +csv.read_all diff --git a/benchmarks/csv/scripts/python_csv.py b/benchmarks/csv/scripts/python_csv.py new file mode 100644 index 0000000000..d8adddaafa --- /dev/null +++ b/benchmarks/csv/scripts/python_csv.py @@ -0,0 +1,4 @@ +import sys +from pandas import read_csv + +csv = read_csv(sys.argv[1]) diff --git a/benchmarks/csv/scripts/python_stdcsv.py b/benchmarks/csv/scripts/python_stdcsv.py new file mode 100644 index 0000000000..b78cb15dc4 --- /dev/null +++ b/benchmarks/csv/scripts/python_stdcsv.py @@ -0,0 +1,8 @@ +import sys +import csv + +lst = list(); +with open(sys.argv[1], 'r') as f: + reader = csv.reader(f, delimiter=':', quoting=csv.QUOTE_NONE) + for row in reader: + list.append(lst, row) diff --git a/benchmarks/csv/scripts/ruby_csv.rb b/benchmarks/csv/scripts/ruby_csv.rb new file mode 100644 index 0000000000..6b1fe02461 --- /dev/null +++ b/benchmarks/csv/scripts/ruby_csv.rb @@ -0,0 +1,3 @@ +require 'csv' + +CSV.read(ARGV.first)