diff --git a/Makefile b/Makefile index b80f1abe..a6962483 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,6 @@ DEPFLAGS += -MD -MF $(SRCDIR)/$*.d SOURCE := $(wildcard $(SRCDIR)/*.cpp) HEADERS += $(wildcard $(SRCDIR)/*.h) -HEADERS += $(wildcard $(SRCDIR)/*.hpp) OBJ := $(SOURCE:.cpp=.o) DEPS := $(SOURCE:.cpp=.d) @@ -32,11 +31,10 @@ DEPS := $(SOURCE:.cpp=.d) # Install variables SCRIPT_DIR = scripts/flitcli -DATA_DIR = $(SCRIPT_DIR)/data +DATA_DIR = data CONFIG_DIR = $(SCRIPT_DIR)/config DOC_DIR = documentation LITMUS_TESTS += $(wildcard litmus-tests/tests/*.cpp) -LITMUS_TESTS += $(wildcard litmus-tests/tests/*.hpp) LITMUS_TESTS += $(wildcard litmus-tests/tests/*.h) INSTALL_FLIT_CONFIG = $(PREFIX)/share/flit/scripts/flitconfig.py @@ -92,6 +90,7 @@ install: $(TARGET) mkdir -m 0755 -p $(PREFIX)/share/flit/scripts mkdir -m 0755 -p $(PREFIX)/share/flit/doc mkdir -m 0755 -p $(PREFIX)/share/flit/data/tests + mkdir -m 0755 -p $(PREFIX)/share/flit/data/db mkdir -m 0755 -p $(PREFIX)/share/flit/config mkdir -m 0755 -p $(PREFIX)/share/flit/litmus-tests ln -sf ../share/flit/scripts/flit.py $(PREFIX)/bin/flit @@ -105,6 +104,7 @@ install: $(TARGET) install -m 0644 $(DATA_DIR)/custom.mk $(PREFIX)/share/flit/data/ install -m 0644 $(DATA_DIR)/main.cpp $(PREFIX)/share/flit/data/ install -m 0644 $(DATA_DIR)/tests/Empty.cpp $(PREFIX)/share/flit/data/tests/ + install -m 0644 $(DATA_DIR)/db/tables-psql.sql $(PREFIX)/share/flit/data/db/ install -m 0644 $(CONFIG_DIR)/flit-default.toml.in $(PREFIX)/share/flit/config/ install -m 0644 $(LITMUS_TESTS) $(PREFIX)/share/flit/litmus-tests/ @echo "Generating $(INSTALL_FLIT_CONFIG)" diff --git a/scripts/flitcli/data/Makefile.in b/data/Makefile.in similarity index 70% rename from scripts/flitcli/data/Makefile.in rename to data/Makefile.in index 49ba813e..d4afe49a 100644 --- a/scripts/flitcli/data/Makefile.in +++ b/data/Makefile.in @@ -1,9 +1,10 @@ # Autogenerated Makefile using "flit update" -DEV_CC := {compiler} FFLAGS ?= DEV_TARGET ?= devrun DEV_CUTARGET ?= cu_devrun +GT_TARGET ?= gtrun +GT_OUT := ground-truth.csv UNAME_S := $(shell uname -s) @@ -22,6 +23,14 @@ DEV_CFLAGS += -Wextra DEV_CFLAGS += -Wuninitialized DEV_CFLAGS += -Wno-shift-count-overflow +DEV_CC ?= {dev_compiler} +DEV_OPTL ?= {dev_optl} +DEV_SWITCHES ?= {dev_switches} + +GT_CC := {ground_truth_compiler} +GT_OPTL := {ground_truth_optl} +GT_SWITCHES := {ground_truth_switches} + LD_REQUIRED += -lm LD_REQUIRED += -lstdc++ ifeq ($(UNAME_S),Darwin) # If we are on a Mac OSX system @@ -39,8 +48,10 @@ TESTS := $(wildcard tests/*.cpp) SOURCE := $(wildcard *.cpp) SOURCE += $(TESTS) -DEV_OBJ := $(SOURCE:%.cpp=%_dev.o) -DEV_DEPS := $(SOURCE:%.cpp=%_dev.d) +DEV_OBJ = $(SOURCE:%.cpp=%_dev.o) +DEV_DEPS = $(SOURCE:%.cpp=%_dev.d) +GT_OBJ = $(SOURCE:%.cpp=%_gt.o) +GT_DEPS = $(SOURCE:%.cpp=%_gt.d) CLANG := clang++ INTEL := icpc @@ -194,18 +205,21 @@ SWITCHES_INTEL += USEFASTM TARGETS := $(foreach c, $(COMPILERS), \ $(foreach s, $(SWITCHES_$(strip $c)), \ $(foreach o, $(OPCODES), \ - $c_$(HOSTNAME)_$(strip $(s))_$(strip $(o))_out \ + $(RESULTS_DIR)/$c_$(HOSTNAME)_$(strip $s)_$(strip $o)_out.csv \ ) \ ) \ ) -BIN := $(TARGETS:%_out=%) -OBJ = $(foreach b,$(BIN),$(SOURCE:%.cpp=%_$(strip $(b)).o)) +BIN := $(TARGETS:%_out.csv=%) +OBJ = $(foreach b,$(BIN),$(SOURCE:%.cpp=%_$(notdir $b).o)) ################################################## # # Now deal with CUDA stuff if it is even available # ################################################## + +# TODO: double check CUDA flags. Really? No optimization levels? + NVCC_BIN := nvcc NVCC := $(shell which $(NVCC_BIN)) CUDA_DIR := $(dir $(NVCC))/.. @@ -272,13 +286,12 @@ CUSWITCHES += PRECSFC CUSWITCHES += PRECSTC CUTARGETS := $(foreach s, $(CUSWITCHES), \ - NVCC_$(HOSTNAME)_$(strip $(s))_out) -CUBIN := $(CUTARGETS:%_out=%) -CUOBJ := $(foreach b,$(CUBIN),$(CUSOURCE:%.cpp=%_$b.o)) + $(RESULTS_DIR)/NVCC_$(HOSTNAME)_$(strip $(s))_out.csv) +CUBIN := $(CUTARGETS:%_out.csv=%) +CUOBJ := $(foreach b,$(CUBIN),$(CUSOURCE:%.cpp=%_$(notdir $b).o)) endif # ifdef HAS_CUDA - .PHONY: help help: @echo 'You can run the Makefile directly, but it is recommended to use' @@ -293,42 +306,50 @@ help: @echo ' help Show this help and exit (default target)' @echo ' dev Only run the devel compilation to test things out' @echo ' devcuda Only run the devel CUDA compilation to test CUDA out' + @echo ' groundtruth Compile the ground-truth version and get its output' + @echo ' gt Same as groundtruth' @echo ' run Run all combinations of compilation, results in results/' @echo ' clean Clean intermediate files' @echo ' veryclean Runs clean + removes targets and results' @echo ' distclean Same as veryclean' @echo -.PHONY: dev devcuda run +.PHONY: dev devcuda gt groundtruth run dev: $(DEV_TARGET) devcuda: $(DEV_CUTARGET) -run: $(TARGETS) $(CUTARGETS) cleanResults archive +gt: $(GT_TARGET) $(GT_OUT) +groundtruth: $(GT_TARGET) $(GT_OUT) + +run: $(TARGETS) $(CUTARGETS) .PHONY: clean clean: rm -f $(DEV_OBJ) rm -f $(DEV_DEPS) rm -f $(DEV_CUOBJ) - rm -f $(TARGETS) - rm -f $(CUTARGETS) - rm -f $(BIN) rm -f $(OBJ) - rm -f $(CUBIN) rm -f $(CUOBJ) + rm -f $(GT_OBJ) + rm -f $(GT_DEPS) .PHONY: veryclean distclean veryclean: distclean distclean: clean rm -f $(DEV_TARGET) rm -f $(DEV_CUTARGET) - rm -f $(TARGETS:%=$(RESULTS_DIR)/%) - rm -f $(BIN:%=$(RESULTS_DIR)/%) - rm -f $(CUTARGETS:%=$(RESULTS_DIR)/%) - rm -f $(CUBIN:%=$(RESULTS_DIR)/%) + rm -f $(TARGETS) + rm -f $(addsuffix *.dat,$(TARGETS)) + rm -f $(BIN) + rm -f $(CUTARGETS) + rm -f $(addsuffix *.dat,$(CUTARGETS)) + rm -f $(CUBIN) + rm -f $(GT_TARGET) + rm -f $(GT_OUT) + rm -f $(addsuffix *.dat,$(GT_OUT)) -rmdir $(RESULTS_DIR) .PRECIOUS: %.d --include $(SOURCE:%.cpp=%.d) +-include $(SOURCE:%.cpp=%.d) $(DEV_DEPS) $(GT_DEPS) Makefile: flit-config.toml $(dir $(FLIT_SCRIPT))/flit_update.py $(FLIT_SCRIPT) update @@ -345,6 +366,7 @@ cleanlibflit: rm -rf lib $(DEV_TARGET): lib/libflit.so +$(GT_TARGET): lib/libflit.so $(BIN): lib/libflit.so $(CUBIN): lib/libflit.so endif # ifeq ($(UNAME_S),Darwin): meaning, we are on a mac @@ -360,7 +382,12 @@ $(DEV_TARGET): $(DEV_OBJ) Makefile -o $@ $(DEV_OBJ) $(LD_REQUIRED) $(DEV_LDFLAGS) %_dev.o: %.cpp Makefile - $(DEV_CC) $(CC_REQUIRED) $(DEV_CFLAGS) $(DEPFLAGS) -c $< -o $@ + $(DEV_CC) $(DEV_OPTL) $(DEV_SWITCHES) $(CC_REQUIRED) $(DEV_CFLAGS) $(DEPFLAGS) -c $< -o $@ \ + -DFLIT_HOST='"$(HOSTNAME)"' \ + -DFLIT_COMPILER='"$(DEV_CC)"' \ + -DFLIT_OPTL='"$(DEV_OPTL)"' \ + -DFLIT_SWITCHES='"$(DEV_SWITCHES)"' \ + -DFLIT_FILENAME='"$(notdir $(DEV_TARGET))"' ifdef HAS_CUDA $(DEV_CUTARGET): $(DEV_CUOBJ) Makefile @@ -370,10 +397,27 @@ $(DEV_CUTARGET): $(DEV_CUOBJ) Makefile $(NVCC) -c $(NVCC_CFLAGS) $(DEV_NVCC_CC) $< -o $@ endif # ifdef HAS_CUDA +# Ground truth compilation rules +$(GT_OUT): $(GT_TARGET) + ./$(GT_TARGET) --output $(GT_OUT) + +$(GT_TARGET): $(GT_OBJ) Makefile + $(GT_CC) $(CC_REQUIRED) -o $@ $(GT_OBJ) $(LD_REQUIRED) + +%_gt.o: %.cpp Makefile + $(GT_CC) $(GT_OPTL) $(GT_SWITCHES) $(CC_REQUIRED) $(DEPFLAGS) -c $< -o $@ \ + -DFLIT_HOST='"$(HOSTNAME)"' \ + -DFLIT_COMPILER='"$(GT_CC)"' \ + -DFLIT_OPTL='"$(GT_OPTL)"' \ + -DFLIT_SWITCHES='"$(GT_SWITCHES)"' \ + -DFLIT_FILENAME='"$(notdir $(GT_TARGET))"' + # Now the true magic begins for the full run compilation rules # very cool: recyclable target definitions! adapted from # http://make.mad-scientist.net/the-eval-function/ +# TODO: try without PERCENT variable +PERCENT := % # Generates rules for # 1. compiling @@ -386,34 +430,34 @@ endif # ifdef HAS_CUDA # @param $2: variable name containing the compiler to use (e.g. GCC) # @param $3: variable name containing the optimization level (e.g. O2) define TARGETS_RULE -#run test and collect results -$(strip $2)_$(HOSTNAME)_$(strip $1)_$(strip $3)_out : $2_$(HOSTNAME)_$(strip $1)_$(strip $3) - -./$$< --output $$@ - -sed -i -e 's/HOST/$(HOSTNAME)/g' $$@ - -sed -i -e 's/SWITCHES/$($(strip $1))/g' $$@ - -sed -i -e 's/OPTL/$($(strip $3))/g' $$@ - -sed -i -e 's/COMPILER/$($(strip $2))/g' $$@ - -sed -i -e 's/FILENAME/$$= 0), -- timing for the function + + foreign key(run) references runs(id) + ); + +-- Tables not created: +-- * clusters +-- * op_counts +-- * opcodes +-- * skipped_pin +-- * switch_conv +-- * switch_desc +-- Do we need these tables? I don't know. diff --git a/scripts/flitcli/data/main.cpp b/data/main.cpp similarity index 56% rename from scripts/flitcli/data/main.cpp rename to data/main.cpp index 6b4798bc..60a4a640 100644 --- a/scripts/flitcli/data/main.cpp +++ b/data/main.cpp @@ -1,5 +1,5 @@ #include "flit.h" int main(int argCount, char* argList[]) { - return runFlitTests(argCount, argList); + return flit::runFlitTests(argCount, argList); } diff --git a/scripts/flitcli/data/tests/Empty.cpp b/data/tests/Empty.cpp similarity index 55% rename from scripts/flitcli/data/tests/Empty.cpp rename to data/tests/Empty.cpp index 186d846c..713bb1bd 100644 --- a/scripts/flitcli/data/tests/Empty.cpp +++ b/data/tests/Empty.cpp @@ -1,18 +1,17 @@ -#include "flit.h" +#include #include template GLOBAL -void Empty_kernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results) { +void Empty_kernel(const flit::CuTestInput* tiList, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif auto& ti = tiList[idx]; - results[idx].s1 = ti.vals[0]; - results[idx].s2 = 0.0; + results[idx] = ti.vals[0]; } /** An example test class to show how to make FLiT tests @@ -30,7 +29,7 @@ class Empty : public flit::TestBase { * Can be zero. If it is zero, then getDefaultInput should return an empty * TestInput object which is as simple as "return {};" */ - virtual size_t getInputsPerRun() { return 1; } + virtual size_t getInputsPerRun() override { return 1; } /** Specify the default inputs for your test. * @@ -41,12 +40,44 @@ class Empty : public flit::TestBase { * If your algorithm takes no inputs, then you can simply return an empty * TestInput object. It is as simple as "return {};". */ - flit::TestInput getDefaultInput() { + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { 1.0 }; return ti; } + /** Custom comparison methods + * + * These comparison operations are meant to create a metric between the test + * results from this test in the current compilation, and the results from + * the ground truth compilation. You can do things like the relative error + * or the absolute error (for the case of long double). + * + * The below specified functions are the default implementations defined in + * the base class. It is safe to delete these two functions if this + * implementation is adequate for you. + * + * Which one is used depends on the type of Variant that is returned from the + * run_impl function. The value returned by compare will be the value stored + * in the database for later analysis. + * + * Note: when using the CUDA kernel functionality, only long double return + * values are valid for now. + */ + virtual long double compare(long double ground_truth, + long double test_results) const override { + // absolute error + return test_results - ground_truth; + } + + /** There is no good default implementation comparing two strings */ + virtual long double compare(const std::string &ground_truth, + const std::string &test_results) const override { + FLIT_UNUSED(ground_truth); + FLIT_UNUSED(test_results); + return 0.0; + } + protected: /** Return a kernel pointer to the CUDA kernel equivalent of run_impl * @@ -60,7 +91,7 @@ class Empty : public flit::TestBase { * See the documentation above Empty_kernel() for details about what the * kernel is expected to have. */ - virtual flit::KernelFunction* getKernel() { return Empty_kernel; } + virtual flit::KernelFunction* getKernel() override { return Empty_kernel; } /** Call or implement the algorithm here. * @@ -71,8 +102,8 @@ class Empty : public flit::TestBase { * You are guarenteed that ti will have exactly getInputsPerRun() inputs in * it. If getInputsPerRun() returns zero, then ti.vals will be empty. */ - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { - return {std::pair(ti.vals[0], 0.0), 0}; + virtual flit::Variant run_impl(const flit::TestInput& ti) override { + return ti.vals[0]; } protected: diff --git a/documentation/available-compiler-flags.md b/documentation/available-compiler-flags.md index 00783836..9556f47f 100644 --- a/documentation/available-compiler-flags.md +++ b/documentation/available-compiler-flags.md @@ -17,8 +17,8 @@ In your configuration file `flit-config.toml` (see [FLiT Configuration File](flit-configuration-file.md)), you specify compiler flags for each of the compilers. Only one compiler flag will be used with one optimization level. If you want to have specific flag combinations, you can place it in the list, -such as `"-mavx2 -mfma -ffastmath"`. Below is the original default list for -the supported compilers: +such as `"-mavx2 -mfma -funsafe-math-optimizations"`. Below is the original +default list for the supported compilers: | Flag | GCC | Clang | Intel | NVCC | | ----------------------------- |:-----:|:-----:|:-----:|:----:| diff --git a/gensrc/testcase.py b/gensrc/testcase.py index cee1d918..b4772de4 100644 --- a/gensrc/testcase.py +++ b/gensrc/testcase.py @@ -9,21 +9,20 @@ # - default_input: populate ti.vals vector. # - vars_initialize: initialize scope variable for the test using ti.vals # - cu_vars_initialize: initialize scope variables for the test in CUDA using tiList[idx].vals -# - func_body: test body that is shared between cuda and non-cuda. Populate score1 and score2 +# - func_body: test body that is shared between cuda and non-cuda. Populate score template_string = ''' #include "flit.h" template GLOBAL void -{name}Kernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results) {{ +{name}Kernel(const flit::CuTestInput* tiList, double* results) {{ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - decltype(results->s1) score1 = 0.0; - decltype(results->s2) score2 = 0.0; + double score = 0.0; {cu_vars_initialize} @@ -31,8 +30,7 @@ {func_body} }} - results[idx].s1 = score1; - results[idx].s2 = score2; + results[idx] = score; }} template @@ -41,8 +39,8 @@ class {name} : public flit::TestBase {{ {name}(std::string id) : flit::TestBase(std::move(id)) {{}} - virtual size_t getInputsPerRun() {{ return {input_count}; }} - virtual flit::TestInput getDefaultInput() {{ + virtual size_t getInputsPerRun() override {{ return {input_count}; }} + virtual flit::TestInput getDefaultInput() override {{ flit::TestInput ti; {default_input} @@ -51,14 +49,13 @@ class {name} : public flit::TestBase {{ }} protected: - virtual flit::KernelFunction* getKernel() {{ + virtual flit::KernelFunction* getKernel() override {{ return {name}Kernel; }} virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) {{ - T score1 = 0.0; - T score2 = 0.0; + flit::Variant run_impl(const flit::TestInput& ti) override {{ + T score = 0.0; flit::info_stream << id << ": Starting test with parameters" << std::endl; for (T val : ti.vals) {{ @@ -71,9 +68,9 @@ class {name} : public flit::TestBase {{ {func_body} - flit::info_stream << id << ": Ending test with values (" << score1 << ", " << score2 << ")" << std::endl; + flit::info_stream << id << ": Ending test with value (" << score << ")" << std::endl; - return {{std::pair(score1, score2), 0}}; + return score; }} protected: @@ -104,8 +101,6 @@ def __init__(self, name, default_input_vals): # Create an environment for the function body env = Environment({ - #'score1': Variable('score1', 'T'), - #'score2': Variable('score2', 'T'), }) var_list = [Variable('in_{0}'.format(i+1), 'T') for i in range(self.input_count)] env.update(zip([x.name for x in var_list], var_list)) @@ -116,8 +111,7 @@ def __init__(self, name, default_input_vals): var = Variable('e{0}'.format(i+1), 'T') self.func_body_lines.append('{0} {1} = {2};'.format(var.type, var.name, random_expression(env, 3))) env[var.name] = var - self.func_body_lines.append('score1 = {0};'.format(random_expression(env, 4, vars_only=True))) - self.func_body_lines.append('score2 = {0};'.format(random_expression(env, 4, vars_only=True))) + self.func_body_lines.append('score = {0};'.format(random_expression(env, 4, vars_only=True))) def write(self, directory='.'): ''' diff --git a/inputGen/main.cpp b/inputGen/main.cpp index 65f7c019..4d5ae58b 100644 --- a/inputGen/main.cpp +++ b/inputGen/main.cpp @@ -1,9 +1,8 @@ #include "helper.h" #include "groundtruth.h" -#include "TestBase.hpp" -//#include "testbed.h" -#include +#include + #include #include #include @@ -13,6 +12,8 @@ #include #include +#include + #include // For dlopen(), dlsym() and dlclose() //TESTRUN_DEFINE(distribution, 3, RandomFloatType::Positive) diff --git a/litmus-tests/disabled/SimpleCHull.cpp b/litmus-tests/disabled/SimpleCHull.cpp index a77af2ac..82ae6580 100644 --- a/litmus-tests/disabled/SimpleCHull.cpp +++ b/litmus-tests/disabled/SimpleCHull.cpp @@ -17,15 +17,13 @@ class SimpleCHull: public flit::TestBase { protected: virtual flit::KernelFunction* getKernel() { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) { FLIT_UNUSED(ti); CHullEdges.clear(); PointList.clear(); ReadInputs(fopen("data/random_input", "r")); SimpleComputeConvexhull(); - return {std::pair((long double) - getEdgeCount(), 0.0), 0}; + return getEdgeCount(); } protected: diff --git a/litmus-tests/tests/DistributivityOfMultiplication.cpp b/litmus-tests/tests/DistributivityOfMultiplication.cpp index 3a83c539..2c0a3e0d 100644 --- a/litmus-tests/tests/DistributivityOfMultiplication.cpp +++ b/litmus-tests/tests/DistributivityOfMultiplication.cpp @@ -13,7 +13,7 @@ template GLOBAL void -DistOfMultKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +DistOfMultKernel(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -24,10 +24,7 @@ DistOfMultKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* re T c = tiList[idx].vals[2]; auto distributed = (a * c) + (b * c); - auto undistributed = (a + b) * c; - results[idx].s1 = distributed; - results[idx].s2 = undistributed; - + results[idx] = distributed; } template @@ -36,32 +33,28 @@ class DistributivityOfMultiplication : public flit::TestBase { DistributivityOfMultiplication(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput(); + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override; protected: - virtual flit::KernelFunction* getKernel() { + virtual flit::KernelFunction* getKernel() override { return DistOfMultKernel; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { T a = ti.vals[0]; T b = ti.vals[1]; T c = ti.vals[2]; auto distributed = (a * c) + (b * c); - auto undistributed = (a + b) * c; flit::info_stream << std::setw(8); flit::info_stream << id << ": (a,b,c) = (" << a << "," << b << "," << c << ")" << std::endl; flit::info_stream << id << ": dist = " << distributed << std::endl; - flit::info_stream << id << ": undist = " - << undistributed << std::endl; - return {std::pair(distributed, undistributed), 0}; + return distributed; } protected: diff --git a/litmus-tests/tests/DoHariGSBasic.cpp b/litmus-tests/tests/DoHariGSBasic.cpp index 6494363b..ac546d65 100644 --- a/litmus-tests/tests/DoHariGSBasic.cpp +++ b/litmus-tests/tests/DoHariGSBasic.cpp @@ -7,7 +7,7 @@ template GLOBAL void -DoHGSBTestKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* result){ +DoHGSBTestKernel(const flit::CuTestInput* tiList, double* result){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -30,8 +30,7 @@ DoHGSBTestKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* re double score = std::abs(o12) + std::abs(o13) + std::abs(o23); - result[idx].s1 = score; - result[idx].s2 = 0; + result[idx] = score; } template @@ -39,13 +38,13 @@ class DoHariGSBasic: public flit::TestBase { public: DoHariGSBasic(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 9; } - virtual flit::TestInput getDefaultInput(); + virtual size_t getInputsPerRun() override { return 9; } + virtual flit::TestInput getDefaultInput() override; protected: - virtual flit::KernelFunction* getKernel() { return DoHGSBTestKernel; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override { return DoHGSBTestKernel; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { using flit::operator<<; long double score = 0.0; @@ -83,7 +82,7 @@ class DoHariGSBasic: public flit::TestBase { << flit::as_int(score) << std::endl; flit::info_stream << id << ": score (dec): " << score << std::endl; } - return {std::pair(score, 0.0), 0}; + return score; } protected: diff --git a/litmus-tests/tests/DoHariGSImproved.cpp b/litmus-tests/tests/DoHariGSImproved.cpp index 30280684..59831214 100644 --- a/litmus-tests/tests/DoHariGSImproved.cpp +++ b/litmus-tests/tests/DoHariGSImproved.cpp @@ -6,7 +6,7 @@ template GLOBAL void -DoHGSITestKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +DoHGSITestKernel(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -28,8 +28,7 @@ DoHGSITestKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* re double score = std::abs(o12) + std::abs(o13) + std::abs(o23); - results[idx].s1 = score; - results[idx].s2 = 0; + results[idx] = score; } template @@ -37,13 +36,12 @@ class DoHariGSImproved: public flit::TestBase { public: DoHariGSImproved(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 9; } - virtual flit::TestInput getDefaultInput(); + virtual size_t getInputsPerRun() override { return 9; } + virtual flit::TestInput getDefaultInput() override; protected: - virtual flit::KernelFunction* getKernel() { return DoHGSITestKernel; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override { return DoHGSITestKernel; } + virtual flit::Variant run_impl(const flit::TestInput& ti) override { long double score = 0.0; //matrix = {a, b, c}; @@ -70,7 +68,7 @@ class DoHariGSImproved: public flit::TestBase { flit::info_stream << id << ": r3: " << r3 << std::endl; flit::info_stream << id << ": w dot prods: " << o12 << ", " << o13 << ", " << o23 << std::endl; } - return {std::pair(score, 0.0l), 0l}; + return score; } protected: diff --git a/litmus-tests/tests/DoMatrixMultSanity.cpp b/litmus-tests/tests/DoMatrixMultSanity.cpp index 66b76930..3c1f14d2 100644 --- a/litmus-tests/tests/DoMatrixMultSanity.cpp +++ b/litmus-tests/tests/DoMatrixMultSanity.cpp @@ -9,7 +9,7 @@ template GLOBAL void -DoMatrixMultSanityKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +DoMatrixMultSanityKernel(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -18,8 +18,7 @@ DoMatrixMultSanityKernel(const flit::CuTestInput* tiList, flit::CudaResultEle auto ti = tiList[idx]; auto b = flit::VectorCU(ti.vals, ti.length); auto c = flit::MatrixCU::Identity(ti.length) * b; - results[idx].s1 = c.L1Distance(b); - results[idx].s2 = c.LInfDistance(b); + results[idx] = c.L1Distance(b); } template @@ -27,9 +26,9 @@ class DoMatrixMultSanity: public flit::TestBase { public: DoMatrixMultSanity(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 16; } + virtual size_t getInputsPerRun() override { return 16; } - virtual flit::TestInput getDefaultInput() { + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.highestDim = getInputsPerRun(); ti.min = -6; @@ -40,16 +39,16 @@ class DoMatrixMultSanity: public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() { return DoMatrixMultSanityKernel; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override { return DoMatrixMultSanityKernel; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto dim = ti.vals.size(); flit::Vector b(ti.vals); auto c = flit::Matrix::Identity(dim) * b; bool eq = (c == b); flit::info_stream << id << ": Product is: " << c << std::endl; flit::info_stream << id << ": A * b == b? " << eq << std::endl; - return {std::pair(c.L1Distance(b), c.LInfDistance(b)), 0}; + return c.L1Distance(b); } protected: diff --git a/litmus-tests/tests/DoOrthoPerturbTest.cpp b/litmus-tests/tests/DoOrthoPerturbTest.cpp index b7a6a55e..36cd7c75 100644 --- a/litmus-tests/tests/DoOrthoPerturbTest.cpp +++ b/litmus-tests/tests/DoOrthoPerturbTest.cpp @@ -8,7 +8,7 @@ template GLOBAL void -DoOPTKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +DoOPTKernel(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -50,8 +50,7 @@ DoOPTKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results } p = backup; } - results[idx].s1 = score; - results[idx].s2 = 0; + results[idx] = score; } template @@ -59,8 +58,8 @@ class DoOrthoPerturbTest : public flit::TestBase { public: DoOrthoPerturbTest(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 16; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 16; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.iters = 200; ti.ulp_inc = 1; @@ -74,9 +73,9 @@ class DoOrthoPerturbTest : public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() { return DoOPTKernel; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override { return DoOPTKernel; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { using flit::operator<<; auto iters = ti.iters; @@ -142,7 +141,7 @@ class DoOrthoPerturbTest : public flit::TestBase { << std::endl; cdim++; } - return {std::pair(score, 0.0), 0}; + return score; } private: diff --git a/litmus-tests/tests/DoSimpleRotate90.cpp b/litmus-tests/tests/DoSimpleRotate90.cpp index 043104ae..25b81a8d 100644 --- a/litmus-tests/tests/DoSimpleRotate90.cpp +++ b/litmus-tests/tests/DoSimpleRotate90.cpp @@ -7,7 +7,7 @@ template GLOBAL void -DoSR90Kernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +DoSR90Kernel(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -21,8 +21,7 @@ DoSR90Kernel(const flit::CuTestInput* tiList, flit::CudaResultElement* result auto done = A.rotateAboutZ_3d(M_PI/2); - results[idx].s1 = done.L1Distance(expected); - results[idx].s2 = done.LInfDistance(expected); + results[idx] = done.L1Distance(expected); } template @@ -30,16 +29,17 @@ class DoSimpleRotate90: public flit::TestBase { public: DoSimpleRotate90(std::string id):flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { 1, 1, 1 }; return ti; } - virtual flit::KernelFunction* getKernel() { return DoSR90Kernel; } protected: - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override { return DoSR90Kernel; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { flit::Vector A(ti.vals); flit::Vector expected = {-A[1], A[0], A[2]}; flit::info_stream << "Rotating A: " << A << ", 1/2 PI radians" << std::endl; @@ -47,9 +47,7 @@ class DoSimpleRotate90: public flit::TestBase { flit::info_stream << "Resulting vector: " << A << std::endl; flit::info_stream << "in " << id << std::endl; A.dumpDistanceMetrics(expected, flit::info_stream); - return {std::pair(A.L1Distance(expected), - A.LInfDistance(expected)), - 0}; + return A.L1Distance(expected); } protected: diff --git a/litmus-tests/tests/DoSkewSymCPRotationTest.cpp b/litmus-tests/tests/DoSkewSymCPRotationTest.cpp index 524bd2f7..b70d40be 100644 --- a/litmus-tests/tests/DoSkewSymCPRotationTest.cpp +++ b/litmus-tests/tests/DoSkewSymCPRotationTest.cpp @@ -7,7 +7,7 @@ template GLOBAL void -DoSkewSCPRKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +DoSkewSCPRKernel(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -23,8 +23,7 @@ DoSkewSCPRKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* re auto rMatrix = flit::MatrixCU::Identity(3) + sscpm + (sscpm * sscpm) * ((1 - cos)/(sine * sine)); auto result = rMatrix * A; - results[idx].s1 = result.L1Distance(B); - results[idx].s1 = result.LInfDistance(B); + results[idx] = result.L1Distance(B); } template @@ -33,8 +32,8 @@ class DoSkewSymCPRotationTest: public flit::TestBase { DoSkewSymCPRotationTest(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 6; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 6; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.min = -6; ti.max = 6; @@ -45,13 +44,11 @@ class DoSkewSymCPRotationTest: public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() { return DoSkewSCPRKernel;} + virtual flit::KernelFunction* getKernel() override { return DoSkewSCPRKernel;} - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { flit::info_stream << "entered " << id << std::endl; long double L1Score = 0.0; - long double LIScore = 0.0; flit::Vector A = { ti.vals[0], ti.vals[1], ti.vals[2] }; flit::Vector B = { ti.vals[3], ti.vals[4], ti.vals[5] }; A = A.getUnitVector(); @@ -76,7 +73,6 @@ class DoSkewSymCPRotationTest: public flit::TestBase { flit::info_stream << "rotator: " << std::endl << rMatrix << std::endl; if(!(result == B)){ L1Score = result.L1Distance(B); - LIScore = result.LInfDistance(B); flit::info_stream << "Skew symmetric cross product rotation failed with "; flit::info_stream << "L1Distance " << L1Score << std::endl; flit::info_stream << "starting vectors: " << std::endl; @@ -85,9 +81,8 @@ class DoSkewSymCPRotationTest: public flit::TestBase { flit::info_stream << B << std::endl; flit::info_stream << "ended up with: " << std::endl; flit::info_stream << "L1Distance: " << L1Score << std::endl; - flit::info_stream << "LIDistance: " << LIScore << std::endl; } - return {std::pair(L1Score, LIScore), 0}; + return L1Score; } private: diff --git a/litmus-tests/tests/FMACancel.cpp b/litmus-tests/tests/FMACancel.cpp index afc48363..72b95804 100644 --- a/litmus-tests/tests/FMACancel.cpp +++ b/litmus-tests/tests/FMACancel.cpp @@ -9,27 +9,24 @@ class FMACancel : public flit::TestBase { public: FMACancel(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 2; } + virtual size_t getInputsPerRun() override { return 2; } - flit::TestInput getDefaultInput() { + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { .1, 1.1e5 }; return ti; } protected: - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { const T a = ti.vals[0]; const T b = ti.vals[1]; const T c = a; const T d = -b; const T score = a*b + c*d; - const T rtemp = c*d; - const T score2 = a*b + rtemp; flit::info_stream << id << ": score = " << score << std::endl; - flit::info_stream << id << ": score2 = " << score2 << std::endl; - return {std::pair(score, score2), 0}; + return score; } protected: diff --git a/litmus-tests/tests/InliningProblem.cpp b/litmus-tests/tests/InliningProblem.cpp index e8644eea..77d582f3 100644 --- a/litmus-tests/tests/InliningProblem.cpp +++ b/litmus-tests/tests/InliningProblem.cpp @@ -9,9 +9,9 @@ class InliningProblem : public flit::TestBase { public: InliningProblem(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 1; } + virtual size_t getInputsPerRun() override { return 1; } - flit::TestInput getDefaultInput() { + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { .1, 1.1e3, -.1, -1.1e3, 1/3 }; return ti; @@ -23,16 +23,15 @@ class InliningProblem : public flit::TestBase { const T x_again = -nx; return x_again; } - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { T a = ti.vals[0]; T also_a = identity(a); const T score = std::sqrt(a) * std::sqrt(also_a); - const T score2 = std::pow(std::sqrt(a), 2); flit::info_stream << id << ": score = " << score << std::endl; - flit::info_stream << id << ": score2 = " << score2 << std::endl; - return {std::pair(score, score2), 0}; + + return score; } protected: diff --git a/litmus-tests/tests/KahanSum.cpp b/litmus-tests/tests/KahanSum.cpp index 1afc3c31..3c3ecbbc 100644 --- a/litmus-tests/tests/KahanSum.cpp +++ b/litmus-tests/tests/KahanSum.cpp @@ -15,11 +15,11 @@ class KahanSum : public flit::TestBase { public: KahanSum(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 10000; } - virtual flit::TestInput getDefaultInput(); + virtual size_t getInputsPerRun() override { return 10000; } + virtual flit::TestInput getDefaultInput() override; protected: - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { Kahan kahan; Shewchuk chuk; T naive = 0.0; @@ -28,13 +28,15 @@ class KahanSum : public flit::TestBase { kahan.add(val); naive += val; } + T kahan_sum = kahan.sum(); + T shewchuk_sum = chuk.sum(); flit::info_stream << id << ": pi = " << static_cast(PI) << std::endl; flit::info_stream << id << ": exp(1) = " << static_cast(EXP) << std::endl; flit::info_stream << id << ": naive sum = " << naive << std::endl; - flit::info_stream << id << ": kahan sum = " << kahan.sum() << std::endl; - flit::info_stream << id << ": shewchuk sum = " << kahan.sum() << std::endl; + flit::info_stream << id << ": kahan sum = " << kahan_sum << std::endl; + flit::info_stream << id << ": shewchuk sum = " << shewchuk_sum << std::endl; flit::info_stream << id << ": Epsilon = " << std::numeric_limits::epsilon() << std::endl; - return {std::pair(kahan.sum(), naive), 0}; + return kahan_sum; } protected: diff --git a/litmus-tests/tests/Paranoia.cpp b/litmus-tests/tests/Paranoia.cpp index 946f6540..3b5c2b9e 100644 --- a/litmus-tests/tests/Paranoia.cpp +++ b/litmus-tests/tests/Paranoia.cpp @@ -208,11 +208,11 @@ class Paranoia : public flit::TestBase { public: Paranoia(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 0; } - virtual flit::TestInput getDefaultInput() { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual flit::TestInput getDefaultInput() override { return {}; } protected: - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti); + virtual flit::Variant run_impl(const flit::TestInput& ti) override; void setTimeout(long millis); // starts the timer for checkTimeout() void checkTimeout(); // throws TimeoutError if timer from setTimeout has expired @@ -320,7 +320,7 @@ void sigfpe(int i) } template -flit::ResultType::mapped_type Paranoia::run_impl(const flit::TestInput& ti) +flit::Variant Paranoia::run_impl(const flit::TestInput& ti) { FLIT_UNUSED(ti); int timeoutMillis = 1000; @@ -1867,9 +1867,7 @@ flit::ResultType::mapped_type Paranoia::run_impl(const flit::TestInput& ti status = ExitStatus::OverflowStatus; } - return {std::pair(Milestone, - static_cast(status)), - 0}; + return Milestone; } /* setTimeout */ diff --git a/litmus-tests/tests/ReciprocalMath.cpp b/litmus-tests/tests/ReciprocalMath.cpp index 635b5645..3687dd44 100644 --- a/litmus-tests/tests/ReciprocalMath.cpp +++ b/litmus-tests/tests/ReciprocalMath.cpp @@ -9,16 +9,16 @@ class ReciprocalMath : public flit::TestBase { public: ReciprocalMath(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 5; } + virtual size_t getInputsPerRun() override { return 5; } - flit::TestInput getDefaultInput() { + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { .1, 1.1e3, -.1, -1.1e3, 1/3 }; return ti; } protected: - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { T a = ti.vals[0]; T b = ti.vals[1]; T c = ti.vals[2]; @@ -30,12 +30,11 @@ class ReciprocalMath : public flit::TestBase { c = c/m; d = d/m; - const T score = a + c; - const T score2 = b + d; + const T score = a + b + c + d; flit::info_stream << id << ": score = " << score << std::endl; - flit::info_stream << id << ": score2 = " << score2 << std::endl; - return {std::pair(score, score2), 0}; + + return score; } protected: diff --git a/litmus-tests/tests/RotateAndUnrotate.cpp b/litmus-tests/tests/RotateAndUnrotate.cpp index 80916b07..86987324 100644 --- a/litmus-tests/tests/RotateAndUnrotate.cpp +++ b/litmus-tests/tests/RotateAndUnrotate.cpp @@ -7,7 +7,7 @@ template GLOBAL void -RaUKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +RaUKern(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -19,8 +19,7 @@ RaUKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ auto orig = A; A = A.rotateAboutZ_3d(theta); A = A.rotateAboutZ_3d(-theta); - results[idx].s1 = A.L1Distance(orig); - results[idx].s2 = A.LInfDistance(orig); + results[idx] = A.L1Distance(orig); } template @@ -28,8 +27,8 @@ class RotateAndUnrotate: public flit::TestBase { public: RotateAndUnrotate(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.min = -6; ti.max = 6; @@ -38,9 +37,9 @@ class RotateAndUnrotate: public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() { return RaUKern; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override { return RaUKern; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto theta = M_PI; auto A = flit::Vector(ti.vals); auto orig = A; @@ -58,7 +57,7 @@ class RotateAndUnrotate: public flit::TestBase { } flit::info_stream << "in " << id << std::endl; A.dumpDistanceMetrics(orig, flit::info_stream); - return {std::pair(dist, A.LInfDistance(orig)), 0}; + return dist; } protected: diff --git a/litmus-tests/tests/RotateFullCircle.cpp b/litmus-tests/tests/RotateFullCircle.cpp index a5abd6ce..4361d063 100644 --- a/litmus-tests/tests/RotateFullCircle.cpp +++ b/litmus-tests/tests/RotateFullCircle.cpp @@ -7,7 +7,7 @@ template GLOBAL void -RFCKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +RFCKern(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -21,8 +21,7 @@ RFCKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ for(decltype(n) r = 0; r < n; ++r){ A = A.rotateAboutZ_3d(theta); } - results[idx].s1 = A.L1Distance(orig); - results[idx].s2 = A.LInfDistance(orig); + results[idx] = A.L1Distance(orig); } template @@ -30,8 +29,8 @@ class RotateFullCircle: public flit::TestBase { public: RotateFullCircle(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.min = -6; ti.max = 6; @@ -43,8 +42,9 @@ class RotateFullCircle: public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() {return RFCKern; } - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override {return RFCKern; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto n = ti.iters; flit::Vector A = flit::Vector(ti.vals); auto orig = A; @@ -62,8 +62,7 @@ class RotateFullCircle: public flit::TestBase { } flit::info_stream << "in " << id << std::endl; A.dumpDistanceMetrics(orig, flit::info_stream); - return {std::pair(A.L1Distance(orig), - A.LInfDistance(orig)), 0}; + return A.L1Distance(orig); } private: diff --git a/litmus-tests/tests/ShewchukSum.cpp b/litmus-tests/tests/ShewchukSum.cpp index 31a86b69..009fce60 100644 --- a/litmus-tests/tests/ShewchukSum.cpp +++ b/litmus-tests/tests/ShewchukSum.cpp @@ -11,22 +11,16 @@ class ShewchukSum : public flit::TestBase { public: ShewchukSum(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 1000; } - virtual flit::TestInput getDefaultInput(); + virtual size_t getInputsPerRun() override { return 1000; } + virtual flit::TestInput getDefaultInput() override; protected: - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { Shewchuk chuk; T naive = 0.0; for (auto val : ti.vals) { chuk.add(val); naive += val; - //flit::info_stream - // << std::setw(7) - // << std::setprecision(7) - // << id << ": + " << val - // << " = " << chuk.sum() << " or " << naive - // << std::endl; flit::info_stream << id << ": partials now: (" << chuk.partials().size() << ") "; for (auto p : chuk.partials()) { @@ -35,10 +29,10 @@ class ShewchukSum : public flit::TestBase { flit::info_stream << std::endl; } T sum = chuk.sum(); - flit::info_stream << id << ": naive sum = " << naive << std::endl; - flit::info_stream << id << ": shewchuk sum = " << sum << std::endl; + flit::info_stream << id << ": naive sum = " << naive << std::endl; + flit::info_stream << id << ": shewchuk sum = " << sum << std::endl; flit::info_stream << id << ": shewchuk partials = " << chuk.partials().size() << std::endl; - return {std::pair(sum, chuk.sum2()), 0}; + return sum; } protected: diff --git a/litmus-tests/tests/SinInt.cpp b/litmus-tests/tests/SinInt.cpp index ba18efee..f12a9acd 100644 --- a/litmus-tests/tests/SinInt.cpp +++ b/litmus-tests/tests/SinInt.cpp @@ -11,9 +11,9 @@ class SinInt : public flit::TestBase { public: SinInt(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 1; } + virtual size_t getInputsPerRun() override { return 1; } - flit::TestInput getDefaultInput() { + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; const T pi = 3.141592653589793238462643383279502884197169399375105820974944592307816406286208998L; ti.vals = { pi }; @@ -21,14 +21,13 @@ class SinInt : public flit::TestBase { } protected: - virtual flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { const int zero = (rand() % 10) / 99; const T val = ti.vals[0]; const T score = std::sin(val + zero) / std::sin(val); - const T score2 = score - 1.0; - flit::info_stream << id << ": score = " << score << std::endl; - flit::info_stream << id << ": score2 = " << score2 << std::endl; - return {std::pair(score, score2), 0}; + flit::info_stream << id << ": score = " << score << std::endl; + flit::info_stream << id << ": score - 1.0 = " << score - 1.0 << std::endl; + return score; } protected: diff --git a/litmus-tests/tests/TrianglePHeron.cpp b/litmus-tests/tests/TrianglePHeron.cpp index 61512c90..a0246ec5 100644 --- a/litmus-tests/tests/TrianglePHeron.cpp +++ b/litmus-tests/tests/TrianglePHeron.cpp @@ -24,7 +24,7 @@ T getArea(const T a, template GLOBAL void -TrianglePHKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results) { +TrianglePHKern(const flit::CuTestInput* tiList, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -48,8 +48,7 @@ TrianglePHKern(const flit::CuTestInput* tiList, flit::CudaResultElement* resu auto crit = getCArea(a,b,c); score += std::abs(crit - checkVal); } - results[idx].s1 = score; - results[idx].s2 = 0.0; + results[idx] = score; } template @@ -57,8 +56,8 @@ class TrianglePHeron: public flit::TestBase { public: TrianglePHeron(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.iters = 200; ti.vals = { 6.0 }; @@ -66,10 +65,9 @@ class TrianglePHeron: public flit::TestBase { } protected: - virtual - flit::KernelFunction* getKernel() {return TrianglePHKern; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override {return TrianglePHKern; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { T maxval = ti.vals[0]; // start as a right triangle T a = maxval; @@ -77,7 +75,6 @@ class TrianglePHeron: public flit::TestBase { T c = maxval * std::sqrt(2); const T delta = maxval / (T)ti.iters; - // 1/2 b*h = A // all perturbations will have the same base and height (plus some FP noise) const T checkVal = 0.5 * b * a; @@ -92,7 +89,7 @@ class TrianglePHeron: public flit::TestBase { auto crit = getArea(a,b,c); score += std::abs(crit - checkVal); } - return {std::pair(score, 0.0), 0}; + return score; } protected: diff --git a/litmus-tests/tests/TrianglePSylv.cpp b/litmus-tests/tests/TrianglePSylv.cpp index 7e8bef04..40fb51a7 100644 --- a/litmus-tests/tests/TrianglePSylv.cpp +++ b/litmus-tests/tests/TrianglePSylv.cpp @@ -22,7 +22,7 @@ T getArea(const T a, template GLOBAL void -TrianglePSKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +TrianglePSKern(const flit::CuTestInput* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -46,8 +46,7 @@ TrianglePSKern(const flit::CuTestInput* tiList, flit::CudaResultElement* resu auto crit = getCArea(a,b,c); score += std::abs(crit - checkVal); } - results[idx].s1 = score; - results[idx].s2 = 0.0; + results[idx] = score; } template @@ -55,8 +54,8 @@ class TrianglePSylv: public flit::TestBase { public: TrianglePSylv(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.iters = 200; ti.vals = { 6.0 }; @@ -64,10 +63,9 @@ class TrianglePSylv: public flit::TestBase { } protected: - virtual - flit::KernelFunction* getKernel() {return TrianglePSKern; } - virtual - flit::ResultType::mapped_type run_impl(const flit::TestInput& ti) { + virtual flit::KernelFunction* getKernel() override {return TrianglePSKern; } + + virtual flit::Variant run_impl(const flit::TestInput& ti) override { T maxval = ti.vals[0]; // start as a right triangle T a = maxval; @@ -90,7 +88,7 @@ class TrianglePSylv: public flit::TestBase { auto crit = getArea(a,b,c); score += std::abs(crit - checkVal); } - return {std::pair(score, 0.0), 0}; + return score; } protected: diff --git a/litmus-tests/tests/langois.cpp b/litmus-tests/tests/langois.cpp index eea104df..54bd748c 100644 --- a/litmus-tests/tests/langois.cpp +++ b/litmus-tests/tests/langois.cpp @@ -14,7 +14,7 @@ // template // GLOBAL // void -// addNameHere(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// addNameHere(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else @@ -38,8 +38,7 @@ // auto crit = getCArea(a,b,c); // score += std::abs(crit - checkVal); // } -// results[idx].s1 = score; -// results[idx].s2 = 0.0; +// results[idx] = score; // } //these are the helpers for the langois compensating algos @@ -80,14 +79,13 @@ class langDotFMA: public flit::TestBase { public: langDotFMA(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 0; } - virtual flit::TestInput getDefaultInput() { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual flit::TestInput getDefaultInput() override { return {}; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { FLIT_UNUSED(ti); using stype = typename std::vector::size_type; stype size = 16; @@ -101,7 +99,7 @@ class langDotFMA: public flit::TestBase { for(stype i = 1; i < size; ++i){ s[i] = std::fma(x[i], y[i], s[i-1]); } - return {std::pair(s[size-1], (T)0.0), 0}; + return s[size-1]; } protected: @@ -116,14 +114,13 @@ class langCompDotFMA: public flit::TestBase { public: langCompDotFMA(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 0; } - virtual flit::TestInput getDefaultInput() { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual flit::TestInput getDefaultInput() override { return {}; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { FLIT_UNUSED(ti); using stype = typename std::vector::size_type; stype size = 16; @@ -140,7 +137,7 @@ class langCompDotFMA: public flit::TestBase { ThreeFMA(x[i], y[i], s[i-1], s[i], a, B); c[i] = c[i-1] + (a + B); } - return {std::pair(s[size-1] + c[size-1], (T)0.0), 0}; + return s[size-1] + c[size-1]; } protected: @@ -155,14 +152,13 @@ class langCompDot: public flit::TestBase { public: langCompDot(std::string id) : flit::TestBase(std::move(id)) {} - virtual size_t getInputsPerRun() { return 0; } - virtual flit::TestInput getDefaultInput() { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual flit::TestInput getDefaultInput() override { return {}; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { FLIT_UNUSED(ti); using stype = typename std::vector::size_type; stype size = 16; @@ -180,7 +176,7 @@ class langCompDot: public flit::TestBase { TwoSum(p, s[i-1], s[i], si); c[i] = c[i-1] + (pi + si); } - return {std::pair(s[size-1] + c[size-1], (T)0.0), 0}; + return s[size-1] + c[size-1]; } protected: diff --git a/litmus-tests/tests/tinys.cpp b/litmus-tests/tests/tinys.cpp index fa482a3b..86432060 100644 --- a/litmus-tests/tests/tinys.cpp +++ b/litmus-tests/tests/tinys.cpp @@ -10,14 +10,13 @@ // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -25,18 +24,17 @@ class FtoDecToF: public flit::TestBase { public: FtoDecToF(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = {std::nextafter(T(0.0), T(1.0))}; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { std::numeric_limits nlim; // from https://en.wikipedia.org/wiki/IEEE_floating_point uint16_t ddigs = nlim.digits * std::log10(2) + 1; @@ -46,7 +44,7 @@ class FtoDecToF: public flit::TestBase { dstr = res.str(); T backAgain; std::istringstream(dstr) >> backAgain; - return{std::pair(std::fabs((long double)ti.vals[0] - backAgain), 0.0), 0}; + return ti.vals[0] - backAgain; } using flit::TestBase::id; @@ -57,14 +55,13 @@ REGISTER_TYPE(FtoDecToF) // template // GLOBAL // void -// subnormalKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// subnormalKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -72,20 +69,17 @@ class subnormal: public flit::TestBase { public: subnormal(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = {std::nextafter(T(0.0), T(1.0))}; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { - return { - std::pair(ti.vals[0] - ti.vals[0] / 2, 0.0), 0 - }; + virtual flit::Variant run_impl(const flit::TestInput& ti) override { + return ti.vals[0] - ti.vals[0] / 2; } using flit::TestBase::id; }; @@ -95,14 +89,13 @@ REGISTER_TYPE(subnormal) // template // GLOBAL // void -// dotProdKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// dotProdKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -110,14 +103,13 @@ class dotProd: public flit::TestBase { public: dotProd(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 0; } - virtual flit::TestInput getDefaultInput() { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual flit::TestInput getDefaultInput() override { return {}; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { FLIT_UNUSED(ti); auto size = 16; @@ -127,8 +119,10 @@ class dotProd: public flit::TestBase { rand.begin() + size)); flit::Vector B(std::vector(rand.begin() + size, rand.begin() + 2*size)); - return {std::pair(A ^ B, 0.0), 0}; + return A ^ B; } + +protected: using flit::TestBase::id; }; REGISTER_TYPE(dotProd) @@ -136,14 +130,13 @@ REGISTER_TYPE(dotProd) // template // GLOBAL // void -// simpleReductionKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// simpleReductionKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -151,14 +144,13 @@ class simpleReduction: public flit::TestBase { public: simpleReduction(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 0; } - virtual flit::TestInput getDefaultInput() { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual flit::TestInput getDefaultInput() override { return {}; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { FLIT_UNUSED(ti); auto vals = flit::getRandSeq(); auto sublen = vals.size() / 4 - 1; @@ -172,7 +164,7 @@ class simpleReduction: public flit::TestBase { for(uint32_t i = sublen; i < vals.size(); ++i){ sum += vals[i]; } - return {std::pair((long double) sum, 0.0), 0}; + return sum; } using flit::TestBase::id; }; @@ -183,14 +175,13 @@ REGISTER_TYPE(simpleReduction) // template // GLOBAL // void -// addTOLKernel(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// addTOLKernel(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -198,8 +189,8 @@ class addTOL : public flit::TestBase { public: addTOL(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; std::numeric_limits nls; auto man_bits = nls.digits; @@ -229,13 +220,13 @@ class addTOL : public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] + ti.vals[1] + ti.vals[2]; - return {std::pair(res, 0.0), 0}; + return res; } + using flit::TestBase::id; }; @@ -246,14 +237,13 @@ REGISTER_TYPE(addTOL) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -261,22 +251,21 @@ class addSub: public flit::TestBase { public: addSub(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = {T(1.0)}; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { std::numeric_limits nls; auto man_bits = nls.digits; auto big = std::pow(2, (T)man_bits - 1); auto res = (ti.vals[0] + big) - big; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -285,14 +274,13 @@ REGISTER_TYPE(addSub) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -300,8 +288,8 @@ class divc: public flit::TestBase { public: divc(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 2; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 2; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -311,12 +299,11 @@ class divc: public flit::TestBase { } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] / ti.vals[1]; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -324,14 +311,13 @@ REGISTER_TYPE(divc) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -339,20 +325,19 @@ class zeroMinusX: public flit::TestBase { public: zeroMinusX(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0] }; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = T(0.0) - ti.vals[0]; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -361,14 +346,13 @@ REGISTER_TYPE(zeroMinusX) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -376,20 +360,19 @@ class xMinusZero: public flit::TestBase { public: xMinusZero(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0] }; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] - (T)0.0; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -398,14 +381,13 @@ REGISTER_TYPE(xMinusZero) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -413,19 +395,18 @@ class zeroDivX: public flit::TestBase { public: zeroDivX(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0] }; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = (T)0.0 / ti.vals[0]; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -434,14 +415,13 @@ REGISTER_TYPE(zeroDivX) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -449,19 +429,18 @@ class xDivOne: public flit::TestBase { public: xDivOne(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0] }; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] / (T)1.0; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -470,14 +449,13 @@ REGISTER_TYPE(xDivOne) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -485,19 +463,18 @@ class xDivNegOne: public flit::TestBase { public: xDivNegOne(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0] }; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] / (T)-1.0; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -506,14 +483,13 @@ REGISTER_TYPE(xDivNegOne) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -521,8 +497,8 @@ class negAdivB: public flit::TestBase { public: negAdivB(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 2; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 2; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -531,12 +507,11 @@ class negAdivB: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = -(ti.vals[0] / ti.vals[1]); - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -545,14 +520,13 @@ REGISTER_TYPE(negAdivB) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } // template @@ -569,11 +543,10 @@ REGISTER_TYPE(negAdivB) // protected: // virtual flit::KernelFunction* getKernel() { return nullptr; } // -// virtual flit::ResultType::mapped_type -// run_impl(const flit::TestInput& ti) { +// virtual flit::Variant run_impl(const flit::TestInput& ti) { // //yes, this is ugly. ti.vals s/b vector of floats // auto res = (T)((std::result_of<::get_next_type(T)>::type)ti.vals[0]); -// return {res, 0.0}; +// return res; // } // using flit::TestBase::id; // }; @@ -583,14 +556,13 @@ REGISTER_TYPE(negAdivB) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -598,8 +570,8 @@ class negAminB: public flit::TestBase { public: negAminB(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 2; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 2; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -608,12 +580,11 @@ class negAminB: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = -(ti.vals[0] - ti.vals[1]); - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -623,14 +594,13 @@ REGISTER_TYPE(negAminB) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -638,20 +608,19 @@ class xMinusX: public flit::TestBase { public: xMinusX(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 1; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 1; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0] }; return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] - ti.vals[0]; - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -661,14 +630,13 @@ REGISTER_TYPE(xMinusX) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -676,8 +644,8 @@ class negAplusB: public flit::TestBase { public: negAplusB(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 2; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 2; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -686,12 +654,11 @@ class negAplusB: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = -(ti.vals[0] + ti.vals[1]); - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -701,14 +668,13 @@ REGISTER_TYPE(negAplusB) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -716,8 +682,8 @@ class aXbDivC: public flit::TestBase { public: aXbDivC(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -727,12 +693,11 @@ class aXbDivC: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] * (ti.vals[1] / ti.vals[2]); - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -742,14 +707,13 @@ REGISTER_TYPE(aXbDivC) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -757,8 +721,8 @@ class aXbXc: public flit::TestBase { public: aXbXc(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -768,12 +732,11 @@ class aXbXc: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] * (ti.vals[1] * ti.vals[2]); - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -783,14 +746,13 @@ REGISTER_TYPE(aXbXc) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -798,8 +760,8 @@ class aPbPc: public flit::TestBase { public: aPbPc(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput() { + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -809,12 +771,11 @@ class aPbPc: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const flit::TestInput& ti) override { auto res = ti.vals[0] + (ti.vals[1] + ti.vals[2]); - return {std::pair(res, 0.0), 0}; + return res; } using flit::TestBase::id; }; @@ -824,14 +785,13 @@ REGISTER_TYPE(aPbPc) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -839,8 +799,8 @@ class xPc1EqC2: public flit::TestBase { public: xPc1EqC2(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -850,12 +810,11 @@ class xPc1EqC2: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { - auto res = ti.vals[0] + ti.vals[1] == ti.vals[2]; - return {std::pair(res?1.0:0.0, 0.0), 0}; + virtual flit::Variant run_impl(const flit::TestInput& ti) override { + bool res = ti.vals[0] + ti.vals[1] == ti.vals[2]; + return res ? 1.0 : 0.0; } using flit::TestBase::id; }; @@ -865,14 +824,13 @@ REGISTER_TYPE(xPc1EqC2) // template // GLOBAL // void -// FtoDecToFKern(const flit::CuTestInput* tiList, flit::CudaResultElement* results){ +// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ // #ifdef __CUDA__ // auto idx = blockIdx.x * blockDim.x + threadIdx.x; // #else // auto idx = 0; // #endif -// results[idx].s1 = ; -// results[idx].s2 = ; +// results[idx] = 0.0; // } template @@ -880,8 +838,8 @@ class xPc1NeqC2: public flit::TestBase { public: xPc1NeqC2(std::string id) : flit::TestBase(std::move(id)){} - virtual size_t getInputsPerRun() { return 3; } - virtual flit::TestInput getDefaultInput(){ + virtual size_t getInputsPerRun() override { return 3; } + virtual flit::TestInput getDefaultInput() override { flit::TestInput ti; ti.vals = { flit::getRandSeq()[0], @@ -891,12 +849,11 @@ class xPc1NeqC2: public flit::TestBase { return ti; } protected: - virtual flit::KernelFunction* getKernel() { return nullptr; } + virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::ResultType::mapped_type - run_impl(const flit::TestInput& ti) { - auto res = ti.vals[0] + ti.vals[1] != ti.vals[2]; - return {std::pair(res?1.0:0.0, 0.0), 0}; + virtual flit::Variant run_impl(const flit::TestInput& ti) override { + bool res = ti.vals[0] + ti.vals[1] != ti.vals[2]; + return res ? 1.0 : 0.0; } using flit::TestBase::id; }; diff --git a/scripts/flitcli/config/flit-default.toml.in b/scripts/flitcli/config/flit-default.toml.in index f933061a..ed5b5b4d 100644 --- a/scripts/flitcli/config/flit-default.toml.in +++ b/scripts/flitcli/config/flit-default.toml.in @@ -1,9 +1,12 @@ [database] -username = 'mbentley' -address = 'localhost' -type = 'postgres' -port = 5432 +# older versions of flit supported postgres. that has been removed. only +# sqlite is supported at the moment. +type = 'sqlite' + +# if relative path, it is relative to the directory containing this +# configuration file. +filepath = 'results.sqlite' [[hosts]] @@ -13,15 +16,23 @@ name = '{hostname}' flit_path = '{flit_path}' config_dir = '{config_dir}' +# The settings for "make dev" +[hosts.dev_build] +# compiler_name must be found in [[hosts.compilers]] list under name attribute +# but the optimization level and switches do not need to be in the compiler list +compiler_name = 'g++' +optimization_level = '-O2' +switches = '-funsafe-math-optimizations' + # The ground truth compilation to use in analysis [hosts.ground_truth] # compiler needs to match the name field for one of this host's hosts.compilers # The optimization level needs to be part of the list of optimization_levels # and the switch needs to be a string of compiler flags from the switches list. # TODO: make use of hosts.ground_truth -compiler = 'g++' +compiler_name = 'g++' optimization_level = '-O0' -switch = '' +switches = '' [[hosts.compilers]] @@ -48,7 +59,7 @@ switch = '' #'-Ofast', #'-O...' ? ] - switches = [ + switches_list = [ '', '-fassociative-math', '-mavx', diff --git a/scripts/flitcli/flit_import.py b/scripts/flitcli/flit_import.py new file mode 100644 index 00000000..09f2613d --- /dev/null +++ b/scripts/flitcli/flit_import.py @@ -0,0 +1,159 @@ +'Implements the import subcommand, importing results into a database' + +import flitutil as util + +import toml + +import argparse +import csv +import datetime +import os +import sqlite3 +import sys + +brief_description = 'Import flit results into the configured database' + +def _file_check(filename): + if not os.path.isfile(filename): + raise argparse.ArgumentTypeError('File does not exist: {0}'.format(filename)) + return filename + +def main(arguments, prog=sys.argv[0]): + parser = argparse.ArgumentParser( + prog=prog, + description=''' + Import flit results into the configured database. The + configured database is found from the settings in + flit-config.toml. You can import either exported results or + results from manually running the tests. Note that importing + the same thing twice will result in having two copies of it + in the database. + ''', + ) + parser.add_argument('importfile', nargs='+', type=_file_check, + help=''' + File(s) to import into the database. These files + may be csv files or sqlite3 databases. + ''') + #parser.add_argument('-t', '--table', default='tests', + # help=''' + # The database table used for import (default is tests) + # ''') + parser.add_argument('-r', '--run', type=int, default=-1, + help=''' + The run number to import under. If the run does + not exist in the runs table, then a new entry will + be created with an autogenerated message. The + default behavior is to use the latest run in the + database (but one will be created if there is no + runs). If importing from an sqlite database, the + run column of the tests table is ignored since + there is no necessary correlation between run + numbers of that database and this database. So you + would still want to use the --run option. For the + sqlite database case, the results imported will + only be from the latest run in the importing + database. + ''') + parser.add_argument('--new-run', action='store_true', + help=''' + Specifies that this import should be under a new + run number that will be autogenerated. This option + conflicts with the --run option, meaning if this + argument is specified, then the --run argument will + be ignored. This option is also implied if the + destination database has no runs in it. + ''') + args = parser.parse_args(arguments) + + try: + projconf = toml.load('flit-config.toml') + except FileNotFoundError: + print('Error: {0} not found. Run "flit init"'.format(tomlfile), + file=sys.stderr) + return 1 + + assert projconf['database']['type'] == 'sqlite', \ + 'Only sqlite database supported' + db = util.sqlite_open(projconf['database']['filepath']) + + # if the database has no runs, then turn on --new-run + run_ids = [x['id'] for x in db.execute('select id from runs')] + if len(run_ids) == 0: + args.new_run = True + print('run_ids: ', run_ids) + + # Find the destination run + if not args.new_run: + assert args.run <= 0 or args.run in run_ids, \ + 'Specified run {0} is not in the runs table'.format(args.run) + if args.run not in run_ids: + args.run = sorted(run_ids)[-1] + else: # args.new_run + # Create a new run to use in import + db.execute('insert into runs(rdate,notes) values (?,?)', + (datetime.datetime.now(), 'Imported using flit import')) + db.commit() + args.run = db.execute('select id from runs order by id').fetchall()[-1]['id'] + + for importee in args.importfile: + print(importee) + if util.is_sqlite(importee): + # Try to treat the importfile like a sqlite database + import_db = util.sqlite_open(importee) + cur = import_db.cursor() + cur.execute('select id from runs') + run_ids = sorted([x['id'] for x in cur]) + if len(run_ids) == 0: + print(' nothing to import') + continue + latest_run = run_ids[-1] + cur.execute('select name,host,compiler,optl,switches,precision,' + 'comparison,comparison_d,file,nanosec ' + 'from tests where run = ?', (latest_run,)) + rows = cur.fetchall() + else: + with open(importee, 'r') as csvin: + reader = csv.DictReader(csvin) + rows = [row for row in reader] + if len(rows) == 0: + print(' nothing to import') + continue + to_insert = [] + for row in rows: + # Convert 'NULL' to None + for key, val in row.items(): + row[key] = val if val != 'NULL' else None + # Insert + to_insert.append(( + args.run, + row['name'], + row['host'], + row['compiler'], + row['optl'], + row['switches'], + row['precision'], + row['comparison'], + row['comparison_d'], + row['file'], + row['nanosec'], + )) + db.executemany(''' + insert into tests( + run, + name, + host, + compiler, + optl, + switches, + precision, + comparison, + comparison_d, + file, + nanosec) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', to_insert) + db.commit() + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/scripts/flitcli/flit_init.py b/scripts/flitcli/flit_init.py index 48f390fd..d4549524 100644 --- a/scripts/flitcli/flit_init.py +++ b/scripts/flitcli/flit_init.py @@ -57,7 +57,7 @@ def main(arguments, prog=sys.argv[0]): # Add litmus tests too if args.litmus_tests: for srcfile in os.listdir(conf.litmus_test_dir): - if os.path.splitext(srcfile)[1] in ('.cpp', '.hpp', '.h'): + if os.path.splitext(srcfile)[1] in ('.cpp', '.h'): srcpath = os.path.join(conf.litmus_test_dir, srcfile) to_copy[os.path.join('tests', srcfile)] = srcpath diff --git a/scripts/flitcli/flit_run.py b/scripts/flitcli/flit_run.py index 4ec2d5dd..9f4a8d9a 100644 --- a/scripts/flitcli/flit_run.py +++ b/scripts/flitcli/flit_run.py @@ -1,4 +1,4 @@ -'Implements the squelch subcommand' +'Implements the run subcommand for executing all compilations of flit tests' import argparse import sys @@ -15,8 +15,8 @@ def main(arguments, prog=sys.argv[0]): should be sent to the database computer for later analysis. ''', ) - parser.add_argument('directory', default='.', - help='The directory to initialize') + parser.add_argument('description', + help='A description of the test run (required)') args = parser.parse_args(arguments) # Subcommand logic here diff --git a/scripts/flitcli/flit_update.py b/scripts/flitcli/flit_update.py index d0ed8f0d..53e324b0 100644 --- a/scripts/flitcli/flit_update.py +++ b/scripts/flitcli/flit_update.py @@ -39,15 +39,46 @@ def main(arguments, prog=sys.argv[0]): else: print('Creating {0}'.format(makefile)) - compiler = projconf['hosts'][0]['compilers'][0]['binary'] - if '/' in compiler: - compiler = os.path.realpath(compiler) + host = projconf['hosts'][0] + dev_build = host['dev_build'] + dev_compiler_name = dev_build['compiler_name'] + dev_optl = dev_build['optimization_level'] + dev_switches = dev_build['switches'] + matching_dev_compilers = [x for x in host['compilers'] + if x['name'] == dev_compiler_name] + assert len(matching_dev_compilers) > 0, \ + 'Compiler name {0} not found'.format(dev_compiler_name) + assert len(matching_dev_compilers) < 2, \ + 'Multiple compilers with name {0} found'.format(dev_compiler_name) + dev_compiler_bin = matching_dev_compilers[0]['binary'] + if '/' in dev_compiler_bin: + dev_compiler_bin = os.path.realpath(dev_compiler_bin) + + ground_truth = host['ground_truth'] + gt_compiler_name = ground_truth['compiler_name'] + gt_optl = ground_truth['optimization_level'] + gt_switches = ground_truth['switches'] + matching_gt_compilers = [x for x in host['compilers'] + if x['name'] == gt_compiler_name] + assert len(matching_dev_compilers) > 0, \ + 'Compiler name {0} not found'.format(gt_compiler_name) + assert len(matching_dev_compilers) < 2, \ + 'Multiple compilers with name {0} found'.format(gt_compiler_names) + # TODO: use the compiler mnemonic rather than the path + gt_compiler_bin = matching_gt_compilers[0]['binary'] + if '/' in dev_compiler_bin: + gt_compiler_bin = os.path.realpath(gt_compiler_name) flitutil.process_in_file( os.path.join(conf.data_dir, 'Makefile.in'), makefile, { - 'compiler': compiler, + 'dev_compiler': dev_compiler_bin, + 'dev_optl': dev_optl, + 'dev_switches': dev_switches, + 'ground_truth_compiler': gt_compiler_bin, + 'ground_truth_optl': gt_optl, + 'ground_truth_switches': gt_switches, 'flit_include_dir': conf.include_dir, 'flit_lib_dir': conf.lib_dir, 'flit_script': os.path.join(conf.script_dir, 'flit.py'), diff --git a/scripts/flitcli/flitconfig.py b/scripts/flitcli/flitconfig.py index 4106c1e4..7f8aa6ea 100644 --- a/scripts/flitcli/flitconfig.py +++ b/scripts/flitcli/flitconfig.py @@ -21,20 +21,20 @@ script_dir = os.path.dirname(os.path.realpath(__file__)) # flit documentation -doc_dir = os.path.realpath(os.path.join(script_dir, '../../documentation')) +doc_dir = os.path.realpath(os.path.join(script_dir, '..', '..', 'documentation')) # compiled libflit.so -lib_dir = os.path.realpath(os.path.join(script_dir, '../../lib')) +lib_dir = os.path.realpath(os.path.join(script_dir, '..', '..', 'lib')) # flit C++ include files, primarily flit.h -include_dir = os.path.realpath(os.path.join(script_dir, '../../src')) +include_dir = os.path.realpath(os.path.join(script_dir, '..', '..', 'src')) # default configuration for flit init config_dir = os.path.join(script_dir, 'config') # default data files such as Makefiles -data_dir = os.path.join(script_dir, 'data') +data_dir = os.path.join(script_dir, '..', '..', 'data') # directory containing litmus tests -litmus_test_dir = os.path.realpath(os.path.join(script_dir, '../../litmus-tests/tests')) +litmus_test_dir = os.path.realpath(os.path.join(script_dir, '..', '..', 'litmus-tests', 'tests')) diff --git a/scripts/flitcli/flitutil.py b/scripts/flitcli/flitutil.py index 5e008219..32ac972d 100644 --- a/scripts/flitcli/flitutil.py +++ b/scripts/flitcli/flitutil.py @@ -2,7 +2,10 @@ Utility functions shared between multiple flit subcommands. ''' +import flitconfig as conf + import os +import sqlite3 import sys def process_in_file(infile, dest, vals, overwrite=False): @@ -25,4 +28,38 @@ def process_in_file(infile, dest, vals, overwrite=False): with open(dest, 'w') as fout: fout.write(fin.read().format(**vals)) +def sqlite_open(filepath): + ''' + Opens and returns an sqlite database cursor object. If the database does + not exist, it will be created. + ''' + # Using detect_types allows us to insert datetime objects + connection = sqlite3.connect(filepath, + detect_types=sqlite3.PARSE_DECLTYPES) + + # Use the dict factory so that queries return dictionary-like objects + connection.row_factory = sqlite3.Row + + # Create the tables if they do not exist. Also has other setup. + table_file = os.path.join(conf.data_dir, 'db', 'tables-sqlite.sql') + with open(table_file, 'r') as table_sql: + connection.executescript(table_sql.read()) + connection.commit() + + return connection + +def is_sqlite(filename): + 'Returns true if the file is likely an sqlite file.' + from os.path import isfile, getsize + + if not os.path.isfile(filename): + return False + + # SQLite database file header is 100 bytes + if os.path.getsize(filename) < 100: + return False + + with open(filename, 'rb') as fd: + header = fd.read(100) + return header[:16] == b'SQLite format 3\000' diff --git a/src/CUHelpers.cpp b/src/CUHelpers.cpp index b5c3d674..1b02de5b 100644 --- a/src/CUHelpers.cpp +++ b/src/CUHelpers.cpp @@ -1,5 +1,5 @@ -#include "CUHelpers.hpp" -#include "flitHelpers.hpp" +#include "CUHelpers.h" +#include "flitHelpers.h" namespace flit { diff --git a/src/CUHelpers.hpp b/src/CUHelpers.h similarity index 99% rename from src/CUHelpers.hpp rename to src/CUHelpers.h index 2e837977..febd37c5 100644 --- a/src/CUHelpers.hpp +++ b/src/CUHelpers.h @@ -14,8 +14,8 @@ #define DEVICE __device__ #define GLOBAL __global__ #endif -#include "flitHelpers.hpp" -#include "CUVector.hpp" +#include "flitHelpers.h" +#include "CUVector.h" #include diff --git a/src/CUVector.hpp b/src/CUVector.h similarity index 99% rename from src/CUVector.hpp rename to src/CUVector.h index e817d208..6dd99c7a 100644 --- a/src/CUVector.hpp +++ b/src/CUVector.h @@ -1,7 +1,7 @@ #ifndef CU_VECTOR_HPP #define CU_VECTOR_HPP -#include "CUHelpers.hpp" +#include "CUHelpers.h" #include diff --git a/src/InfoStream.cpp b/src/InfoStream.cpp index 8fbc4bc4..20eecf50 100644 --- a/src/InfoStream.cpp +++ b/src/InfoStream.cpp @@ -1,4 +1,4 @@ -#include "InfoStream.hpp" +#include "InfoStream.h" #include diff --git a/src/InfoStream.hpp b/src/InfoStream.h similarity index 100% rename from src/InfoStream.hpp rename to src/InfoStream.h diff --git a/src/TestBase.cpp b/src/TestBase.cpp index f2c6ca4c..9650b120 100644 --- a/src/TestBase.cpp +++ b/src/TestBase.cpp @@ -1,35 +1,21 @@ //this is the base instantiation for tests -#include "TestBase.hpp" +#include "TestBase.h" #include -// namespace { -// double volatile baseD; -// bool dReg = false; -// float volatile baseF; -// bool fReg = false; -// long double volatile baseL; -// bool lReg = false; -// bool watching = false; -// std::stack fStack; -// std::stack dStack; -// std::stack lStack; -// } // end of unnamed namespace namespace flit { - //output operator for ResultType -std::ostream& -operator<<(std::ostream& os, const ResultType& res){ - // std::string name = r.first; - // std::string prec; - // long double s1; - // long double s2; - // std::tie(prec, s1, s2) = r.second; - for(auto r : res){ - os << r.first.first << ":" << r.first.second << "," - << r.second.first.first << "," << r.second.first.second << r.second.second << std::endl; - } +std::ostream& operator<<(std::ostream& os, const TestResult& res) { + std::string comparison = + (res.is_comparison_null() ? std::to_string(res.comparison()) : "NULL"); + + os << res.name() << ":" << res.precision() << "," + << res.result() << "," + << comparison << "," + << res.nanosecs(); + return os; } + } // end of namespace flit diff --git a/src/TestBase.hpp b/src/TestBase.h similarity index 62% rename from src/TestBase.hpp rename to src/TestBase.h index 609bbfff..329ce070 100644 --- a/src/TestBase.hpp +++ b/src/TestBase.h @@ -5,30 +5,70 @@ #ifndef TEST_BASE_HPP #define TEST_BASE_HPP -#include "flitHelpers.hpp" +#include "flitHelpers.h" #ifdef __CUDA__ -#include "CUHelpers.hpp" +#include "CUHelpers.h" #endif // __CUDA__ +#include "Variant.h" + +#include #include #include +#include #include #include #include #include + #include namespace flit { void setWatching(bool watch = true); +struct TestResult { +public: + TestResult(const std::string &_name, const std::string &_precision, + const Variant &_result, int_fast64_t _nanosecs, + const std::string &_resultfile = "") + : m_name(_name) + , m_precision(_precision) + , m_result(_result) + , m_nanosecs(_nanosecs) + , m_resultfile(_resultfile) + { } + + // getters + std::string name() const { return m_name; } + std::string precision() const { return m_precision; } + Variant result() const { return m_result; } + int_fast64_t nanosecs() const { return m_nanosecs; } + long double comparison() const { return m_comparison; } + bool is_comparison_null() const { return m_is_comparison_null; } + std::string resultfile() const { return m_resultfile; } + + // setters + void set_comparison(long double _comparison) { + m_comparison = _comparison; + m_is_comparison_null = false; + } + void set_resultfile(const std::string &_resultfile) { + m_resultfile = _resultfile; + } -using ResultType = std::map, - std::pair, int_fast64_t>>; +private: + std::string m_name; + std::string m_precision; + Variant m_result; + int_fast64_t m_nanosecs {0}; + long double m_comparison {0.0L}; + bool m_is_comparison_null {true}; + std::string m_resultfile; +}; -std::ostream& -operator<<(std::ostream&, const ResultType&); +std::ostream& operator<<(std::ostream& os, const TestResult& res); template struct TestInput { @@ -40,11 +80,6 @@ struct TestInput { std::vector vals; }; -struct CudaResultElement { - double s1; - double s2; -}; - /** A simple structure used in CUDA tests. * * It stores some values and a pointer, but does not do dynamic allocation nor @@ -91,7 +126,7 @@ struct CuTestInput { * @param results: array where to store results, already allocated */ template -using KernelFunction = void (const CuTestInput*, CudaResultElement*); +using KernelFunction = void (const CuTestInput*, double*); template using CudaDeleter = void (T*); @@ -140,7 +175,7 @@ std::unique_ptr*> makeCudaArr(const T* vals, size_t length) { * @param stride: how many inputs per test run */ template -std::vector +std::vector runKernel(KernelFunction* kernel, const TestInput& ti, size_t stride) { #ifdef __CUDA__ size_t runCount; @@ -157,7 +192,7 @@ runKernel(KernelFunction* kernel, const TestInput& ti, size_t stride) { ctiList[i].vals = ti.vals.data() + i * stride; ctiList[i].length = stride; } - std::unique_ptr cuResults(new CudaResultElement[runCount]); + std::unique_ptr cuResults(new double[runCount]); // Note: __CPUKERNEL__ mode is broken by the change to run the kernel in // multithreaded mode. Its compilation is broken. // TODO: fix __CPUKERNEL__ mode for testing. @@ -170,25 +205,21 @@ runKernel(KernelFunction* kernel, const TestInput& ti, size_t stride) { ctiList[i].vals = deviceVals.get() + i * stride; } auto deviceInput = makeCudaArr(ctiList.get(), runCount); - auto deviceResult = makeCudaArr(nullptr, runCount); + auto deviceResult = makeCudaArr(nullptr, runCount); kernel<<>>(deviceInput.get(), deviceResult.get()); - auto resultSize = sizeof(CudaResultElement) * runCount; + auto resultSize = sizeof(double) * runCount; checkCudaErrors(cudaMemcpy(cuResults.get(), deviceResult.get(), resultSize, cudaMemcpyDeviceToHost)); # endif // __CPUKERNEL__ - std::vector results; - for (size_t i = 0; i < runCount; i++) { - results.emplace_back(std::pair - (cuResults[i].s1, cuResults[i].s2), 0); - } + std::vector results(cuResults, cuResults + runCount); return results; -#else // not __CUDA__ +#else // not __CUDA__ // Do nothing FLIT_UNUSED(kernel); FLIT_UNUSED(ti); FLIT_UNUSED(stride); return {}; -#endif // __CUDA__ +#endif // __CUDA__ } template @@ -208,13 +239,14 @@ class TestBase { * * @see getInputsPerRun */ - virtual ResultType run(const TestInput& ti, - const bool GetTime, - const size_t TimingLoops) { + virtual std::vector run(const TestInput& ti, + const std::string &filebase, + const bool GetTime, + const size_t TimingLoops) { using std::chrono::high_resolution_clock; using std::chrono::duration; using std::chrono::duration_cast; - ResultType results; + std::vector results; TestInput emptyInput { ti.iters, ti.highestDim, ti.ulp_inc, ti.min, ti.max, {} }; @@ -237,78 +269,145 @@ class TestBase { } // Run the tests - std::vector scoreList; + struct TimedResult { + Variant result; + int_fast64_t time; + std::string resultfile; + + TimedResult(Variant res, int_fast64_t t, const std::string &f = "") + : result(res), time(t), resultfile(f) { } + }; + std::vector resultValues; #ifdef __CUDA__ auto kernel = getKernel(); if (kernel == nullptr) { for (auto runInput : inputSequence) { + Variant testResult; + int_fast64_t timing = 0; if (GetTime) { - ResultType::mapped_type scores; int_fast64_t nsecs = 0; for (int r = 0; r < TimingLoops; ++r) { auto s = high_resolution_clock::now(); - scores = run_impl(runInput); + testResult = run_impl(runInput); auto e = high_resolution_clock::now(); nsecs += duration_cast>(e-s).count(); assert(nsecs > 0); } - scores.second = nsecs / TimingLoops; - scoreList.push_back(scores); + timing = nsecs / TimingLoops; } else { - scoreList.push_back(run_impl(runInput)); + testResult = run_impl(runInput); + timing = 0; } + // Output string results to file since it alone may take up to 300 MB + // or more + std::string outfile; + if (testResult.type() == Variant::Type::String) { + outfile = filebase + "_" + id + "_" + typeid(T).name() + ".dat"; + std::ofstream resultout(outfile); + resultout << testResult.string(); + testResult = Variant(); // empty the result to release memory + } + resultValues.emplace_back(testResult, timing, outfile); } } else { + int_fast64_t timing = 0; + std::vector scoreList; if (GetTime) { - ResultType::mapped_type scores; int_fast64_t nsecs = 0; for (size_t r = 0; r < TimingLoops; ++r){ auto s = high_resolution_clock::now(); + // TODO: find out how to properly profile CUDA kernels. + // FIXME: This strategy of timing is not right because: + // FIXME: 1. multiple inputs are tested in parallel + // FIXME: 2. timing is done not only over kernel execution, but also + // FIXME: in transfer time + // FIXME: 3. stalls in device availability are not accounted for scoreList = runKernel(kernel, ti, stride); auto e = high_resolution_clock::now(); nsecs += duration_cast>(e-s).count(); assert(nsecs > 0); } auto avg = nsecs / TimingLoops; - auto avgPerKernel = avg / scoreList.size(); - for (auto& s : scoreList) { - s.second = avgPerKernel; - } + timing = avg / scoreList.size(); } else { scoreList = runKernel(kernel, ti, stride); + timing = 0; + } + for (auto& testResult : scoreList) { + resultValues.emplace_back(testResult, timing); } } #else // not __CUDA__ for (auto runInput : inputSequence) { + Variant testResult; + int_fast64_t timing = 0; if (GetTime) { - ResultType::mapped_type scores; int_fast64_t nsecs = 0; for (size_t r = 0; r < TimingLoops; ++r) { auto s = high_resolution_clock::now(); - scores = run_impl(runInput); + testResult = run_impl(runInput); auto e = high_resolution_clock::now(); nsecs += duration_cast>(e-s).count(); assert(nsecs > 0); } - scores.second = nsecs / TimingLoops; - scoreList.push_back(scores); + timing = nsecs / TimingLoops; } else { - scoreList.push_back(run_impl(runInput)); + testResult = run_impl(runInput); + timing = 0; + } + // Output string results to file since it alone may take up to 300 MB + // or more + std::string outfile; + if (testResult.type() == Variant::Type::String) { + outfile = filebase + "_" + id + "_" + typeid(T).name() + ".dat"; + std::ofstream resultout(outfile); + resultout << testResult.string(); + testResult = Variant(); // empty the result to release memory } + resultValues.emplace_back(testResult, timing, outfile); } #endif // __CUDA__ // Store and return the test results - for (size_t i = 0; i < scoreList.size(); i++) { + for (size_t i = 0; i < resultValues.size(); i++) { std::string name = id; - if (scoreList.size() != 1) { + if (resultValues.size() != 1) { name += "_idx" + std::to_string(i); } - results.insert({{name, typeid(T).name()}, scoreList[i]}); + results.emplace_back(name, typeid(T).name(), resultValues[i].result, + resultValues[i].time, resultValues[i].resultfile); } return results; } + /** Simply forwards the request to the appropriate overload of compare. + * + * If the types of the variants do not match, then a std::runtime_error is + * thrown. + */ + long double variant_compare(const Variant &ground_truth, + const Variant &test_results) { + if (ground_truth.type() != test_results.type()) { + throw std::runtime_error("Variants to compare are of different types"); + } + long double val = 0.0; + switch (ground_truth.type()) { + case Variant::Type::LongDouble: + val = this->compare(ground_truth.longDouble(), + test_results.longDouble()); + break; + + case Variant::Type::String: + val = this->compare(ground_truth.string(), + test_results.string()); + break; + + default: + throw std::runtime_error("Unimplemented Variant type"); + } + return val; + } + /** This is a set of default inputs to use for the test * * This function should be implemented such that we can simply call this test @@ -330,6 +429,38 @@ class TestBase { */ virtual size_t getInputsPerRun() = 0; + /** Custom comparison methods + * + * These comparison operations are meant to create a metric between the test + * results from this test in the current compilation, and the results from + * the ground truth compilation. You can do things like the relative error + * or the absolute error (for the case of long double). + * + * The below specified functions are the default implementations defined in + * the base class. It is safe to delete these two functions if this + * implementation is adequate for you. + * + * Which one is used depends on the type of Variant that is returned from the + * run_impl function. The value returned by compare will be the value stored + * in the database for later analysis. + * + * Note: when using the CUDA kernel functionality, only long double return + * values are valid for now. + */ + virtual long double compare(long double ground_truth, + long double test_results) const { + // absolute error + return test_results - ground_truth; + } + + /** There is no good default implementation comparing two strings */ + virtual long double compare(const std::string &ground_truth, + const std::string &test_results) const { + FLIT_UNUSED(ground_truth); + FLIT_UNUSED(test_results); + return 0.0; + } + protected: /** If this test implements a CUDA kernel, return the kernel pointer * @@ -351,27 +482,29 @@ class TestBase { * test inputs required according to the implemented getInputsPerRun(). So * if that function returns 9, then the vector will have exactly 9 * elements. - * @return a single result. See ResultType to see what the mapped types is. + * @return a single result. You can return any type supported by flit::Variant. + * + * The returned value (whichever type is chosen) will be used by the public + * virtual compare() method. */ - virtual ResultType::mapped_type run_impl(const TestInput& ti) = 0; + virtual Variant run_impl(const TestInput& ti) = 0; protected: const std::string id; }; -/// A completely empty test that outputs nothing +/** A completely empty test that outputs nothing */ template class NullTest : public TestBase { public: NullTest(std::string id) : TestBase(std::move(id)) {} - virtual TestInput getDefaultInput() { return {}; } - virtual size_t getInputsPerRun() { return 0; } - virtual ResultType run(const TestInput&, - const bool, - const size_t) { return {}; } + virtual TestInput getDefaultInput() override { return {}; } + virtual size_t getInputsPerRun() override { return 0; } + virtual std::vector run( + const TestInput&, const bool, const size_t) override { return {}; } protected: - virtual KernelFunction* getKernel() { return nullptr; } - virtual ResultType::mapped_type run_impl(const TestInput&) { return {}; } + virtual KernelFunction* getKernel() override { return nullptr; } + virtual Variant run_impl(const TestInput&) override { return {}; } }; class TestFactory { @@ -420,13 +553,13 @@ inline std::shared_ptr> TestFactory::get () { #ifdef __CUDA__ #define REGISTER_TYPE(klass) \ - class klass##Factory : public flit::TestFactory { \ + class klass##Factory : public flit::TestFactory { \ public: \ klass##Factory() { \ - flit::registerTest(#klass, this); \ + flit::registerTest(#klass, this); \ } \ protected: \ - virtual createType create() { \ + virtual createType create() override { \ return std::make_tuple( \ std::make_shared>(#klass), \ std::make_shared>(#klass), \ @@ -440,13 +573,13 @@ inline std::shared_ptr> TestFactory::get () { #else // not __CUDA__ #define REGISTER_TYPE(klass) \ - class klass##Factory : public flit::TestFactory { \ + class klass##Factory : public flit::TestFactory { \ public: \ klass##Factory() { \ - flit::registerTest(#klass, this); \ + flit::registerTest(#klass, this); \ } \ protected: \ - virtual createType create() { \ + virtual createType create() override { \ return std::make_tuple( \ std::make_shared>(#klass), \ std::make_shared>(#klass), \ @@ -463,16 +596,6 @@ inline std::map& getTests() { return tests; } -// template ::type* = nullptr> -// static std::map& getTests() { -// #ifdef __CUDA__ -// return {}; -// #else -// static std::map tests; -// return tests; -// #endif -// } - inline void registerTest(const std::string& name, TestFactory *factory) { getTests()[name] = factory; } diff --git a/src/Variant.cpp b/src/Variant.cpp new file mode 100644 index 00000000..7e5660af --- /dev/null +++ b/src/Variant.cpp @@ -0,0 +1,32 @@ +#include "Variant.h" + +namespace flit { + +std::ostream& operator<< (std::ostream& out, const Variant &val) { + switch (val.type()) { + case Variant::Type::None: + out << "Variant(None)"; + break; + case Variant::Type::LongDouble: + out << "Variant(" << val.longDouble() << ")"; + break; + case Variant::Type::String: + out << "Variant(\"" << val.string() << "\")"; + break; + default: + throw std::runtime_error("Unimplemented type"); + } + return out; +} + +template <> +long double Variant::val() const { + return this->longDouble(); +} + +template <> +std::string Variant::val() const { + return this->string(); +} + +} // end of namespace flit diff --git a/src/Variant.h b/src/Variant.h new file mode 100644 index 00000000..d4270cbb --- /dev/null +++ b/src/Variant.h @@ -0,0 +1,71 @@ +#ifndef VARIANT_H +#define VARIANT_H + +#include +#include +#include + +namespace flit { + +/** Can represent various different types + * + * This class is intented to be able to hold many different types in the + * same object so that you can do things like make a list containing + * sometimes strings and sometimes integers, etc. + */ +class Variant { +public: + enum class Type { + None = 1, + LongDouble = 2, + String = 3, + }; + + Variant() : _type(Type::None) { } + + Variant(long double val) + : _type(Type::LongDouble) + , _ld_val(val) { } + + Variant(std::string &val) + : _type(Type::String) + , _str_val(val) { } + Variant(const std::string &val) + : _type(Type::String) + , _str_val(val) { } + Variant(std::string &&val) + : _type(Type::String) + , _str_val(val) { } + Variant(const char* val) + : _type(Type::String) + , _str_val(val) { } + + Type type() const { return _type; } + + long double longDouble() const { + if (_type != Type::LongDouble) { + throw std::runtime_error("Variant is not of type Long Double"); + } + return _ld_val; + } + + std::string string() const { + if (_type != Type::String) { + throw std::runtime_error("Variant is not of type String"); + } + return _str_val; + } + + template T val() const; + +private: + Type _type; + long double _ld_val { 0.0l }; + std::string _str_val { "" }; +}; + +std::ostream& operator<< (std::ostream&, const Variant&); + +} // end of namespace flit + +#endif // VARIANT_H diff --git a/src/flit.cpp b/src/flit.cpp index 8d3a1c7f..c223d04c 100644 --- a/src/flit.cpp +++ b/src/flit.cpp @@ -8,30 +8,86 @@ #include #include +#include #include #include "flit.h" -#include "flitHelpers.hpp" -#include "TestBase.hpp" - -void outputResults(const flit::ResultType& scores, std::ostream& out){ - using flit::operator<<; - using flit::as_int; - for(const auto& i: scores){ - out - << "HOST,SWITCHES,OPTL,COMPILER," - << i.first.second << ",us," // sort - << i.second.first.first << "," // score0d - << as_int(i.second.first.first) << "," // score0 - << i.second.first.second << "," // score1d - << as_int(i.second.first.second) << "," // score1 - << i.first.first << "," // name - << i.second.second << "," // nanoseconds - << "FILENAME" // filename - << std::endl; +#include "flitHelpers.h" +#include "TestBase.h" + +namespace { + +/** Helper class for Csv. + * + * Represents a single row either indexed by number or by column name. + */ +class CsvRow : public std::vector { +public: + const CsvRow* header() const { return m_header; } + void setHeader(CsvRow* head) { m_header = head; } + + using std::vector::operator[]; + std::string const& operator[](std::string col) const { + auto iter = std::find(m_header->begin(), m_header->end(), col); + if (iter == m_header->end()) { + std::stringstream message; + message << "No column named " << col; + throw std::invalid_argument(message.str()); + } + auto idx = iter - m_header->begin(); + return this->operator[](idx); } -} + +private: + CsvRow* m_header {nullptr}; // not owned by this class +}; + +/** Class for parsing csv files */ +class Csv { +public: + Csv(std::istream &in) : m_header(Csv::parseRow(in)), m_in(in) { + m_header.setHeader(&m_header); + } + + Csv& operator>> (CsvRow& row) { + row = Csv::parseRow(m_in); + row.setHeader(&m_header); + return *this; + } + + operator bool() const { return static_cast(m_in); } + +private: + static CsvRow parseRow(std::istream &in) { + std::string line; + std::getline(in, line); + + std::stringstream lineStream(line); + std::string token; + + // tokenize on ',' + CsvRow row; + while(std::getline(lineStream, token, ',')) { + row.emplace_back(token); + } + + // check for trailing comma with no data after it + if (!lineStream && token.empty()) { + row.emplace_back(""); + } + + return row; + } + +private: + CsvRow m_header; + std::istream &m_in; +}; + +} // end of unnamed namespace + +namespace flit { std::string FlitOptions::toString() { std::ostringstream messanger; @@ -42,6 +98,8 @@ std::string FlitOptions::toString() { << " verbose: " << boolToString(this->verbose) << "\n" << " timing: " << boolToString(this->timing) << "\n" << " timingLoops: " << this->timingLoops << "\n" + << " output: " << this->output << "\n" + << " groundTruth: " << this->groundTruth << "\n" << " precision: " << this->precision << "\n" << " tests:\n"; for (auto& test : this->tests) { @@ -53,17 +111,18 @@ std::string FlitOptions::toString() { FlitOptions parseArguments(int argCount, char* argList[]) { FlitOptions options; - std::vector helpOpts = { "-h", "--help" }; - std::vector verboseOpts = { "-v", "--verbose" }; - std::vector timingOpts = { "-t", "--timing" }; - std::vector loopsOpts = { "-l", "--timing-loops" }; - std::vector listTestsOpts = { "-L", "--list-tests" }; - std::vector precisionOpts = { "-p", "--precision" }; - std::vector outputOpts = { "-o", "--output" }; + std::vector helpOpts = { "-h", "--help" }; + std::vector verboseOpts = { "-v", "--verbose" }; + std::vector timingOpts = { "-t", "--timing" }; + std::vector loopsOpts = { "-l", "--timing-loops" }; + std::vector listTestsOpts = { "-L", "--list-tests" }; + std::vector precisionOpts = { "-p", "--precision" }; + std::vector outputOpts = { "-o", "--output" }; + std::vector groundTruthOpts = { "-g", "--ground-truth" }; std::vector allowedPrecisions = { "all", "float", "double", "long double" }; - auto allowedTests = getKeys(flit::getTests()); + auto allowedTests = getKeys(getTests()); allowedTests.emplace_back("all"); for (int i = 1; i < argCount; i++) { std::string current(argList[i]); @@ -99,6 +158,11 @@ FlitOptions parseArguments(int argCount, char* argList[]) { throw ParseException(current + " requires an argument"); } options.output = argList[++i]; + } else if (isIn(groundTruthOpts, current)) { + if (i+1 == argCount) { + throw ParseException(current + " requires an argument"); + } + options.groundTruth = argList[++i]; } else { options.tests.push_back(current); if (!isIn(allowedTests, current)) { @@ -108,7 +172,7 @@ FlitOptions parseArguments(int argCount, char* argList[]) { } if (options.tests.size() == 0 || isIn(options.tests, std::string("all"))) { - options.tests = getKeys(flit::getTests()); + options.tests = getKeys(getTests()); } return options; @@ -152,6 +216,18 @@ std::string usage(std::string progName) { " standard output will still go to the terminal.\n" " The default behavior is to output to stdout.\n" "\n" + " -g INFILE, --ground-truth INFILE\n" + " Use the following results file (usually generated\n" + " using the --output option with the ground-truth\n" + " compiled executable). This option allows the\n" + " creation of data for the comparison column in the\n" + " results. The test's compare() function is used.\n" + "\n" + " Note: for tests outputting string data, the path\n" + " may be a relative path from where you executed the\n" + " ground-truth executable, in which case you will\n" + " want to run this test from that same directory.\n" + "\n" " -p PRECISION, --precision PRECISION\n" " Which precision to run. The choices are 'float',\n" " 'double', 'long double', and 'all'. The default\n" @@ -160,3 +236,48 @@ std::string usage(std::string progName) { return messanger.str(); } +std::string readFile(const std::string &filename) { + std::ifstream filein(filename); + std::stringstream buffer; + buffer << filein.rdbuf(); + return buffer.str(); +} + +std::vector parseResults(std::istream &in) { + std::vector results; + + Csv csv(in); + CsvRow row; + while (csv >> row) { + auto nanosec = std::stol(row["nanosec"]); + Variant value; + std::string resultfile; + if (row["score"] != "NULL") { + // Convert score into a long double + value = as_float(flit::stouint128(row["score"])); + } else { + // Read string from the resultfile + assert(row["resultfile"] != "NULL"); + resultfile = row["resultfile"]; + } + + results.emplace_back(row["name"], row["precision"], value, nanosec, + resultfile); + } + + return results; +} + +std::string removeIdxFromName(const std::string &name) { + std::string pattern("_idx"); // followed by 1 or more digits + auto it = std::find_end(name.begin(), name.end(), + pattern.begin(), pattern.end()); + // assert that after the pattern, all the remaining chars are digits. + assert(it == name.end() || + std::all_of(it + pattern.size(), name.end(), [](char c) { + return '0' <= c && c <= '9'; + })); + return std::string(name.begin(), it); +} + +} // end of namespace flit diff --git a/src/flit.h b/src/flit.h index 32061712..09aedf98 100644 --- a/src/flit.h +++ b/src/flit.h @@ -4,12 +4,12 @@ #ifndef FLIT_H #define FLIT_H 0 -#include "flitHelpers.hpp" -#include "TestBase.hpp" +#include "flitHelpers.h" +#include "TestBase.h" #ifdef __CUDA__ //#include -#include "CUHelpers.hpp" +#include "CUHelpers.h" #endif #include @@ -23,19 +23,34 @@ #include -void outputResults(const flit::ResultType& scores, std::ostream& out); +// Define macros to use in the output +// These can be overridden at compile time to insert compile-time information -template -void runTestWithDefaultInput(flit::TestFactory* factory, - flit::ResultType& totScores, - bool shouldTime = true, - int timingLoops = 1) { - auto test = factory->get(); - auto ip = test->getDefaultInput(); - auto scores = test->run(ip, shouldTime, timingLoops); - totScores.insert(scores.begin(), scores.end()); - flit::info_stream.flushout(); -} +#ifndef FLIT_HOST +#define FLIT_HOST "HOST" +#endif // FLIT_HOST + +#ifndef FLIT_COMPILER +#define FLIT_COMPILER "COMPILER" +#endif // FLIT_COMPILER + +#ifndef FLIT_OPTL +#define FLIT_OPTL "OPTL" +#endif // FLIT_OPTL + +#ifndef FLIT_SWITCHES +#define FLIT_SWITCHES "SWITCHES" +#endif // FLIT_SWITCHES + +#ifndef FLIT_NULL +#define FLIT_NULL "NULL" +#endif // FLIT_NULL + +#ifndef FLIT_FILENAME +#define FLIT_FILENAME "FILENAME" +#endif // FLIT_FILENAME + +namespace flit { /** Command-line options */ struct FlitOptions { @@ -47,6 +62,7 @@ struct FlitOptions { std::string output = ""; // output file for results. default stdout bool timing = true; // should we run timing? int timingLoops = 1; // < 1 means to auto-determine the timing loops + std::string groundTruth = ""; // input for ground-truth comparison /** Give a string representation of this struct for printing purposes */ std::string toString(); @@ -57,6 +73,128 @@ struct FlitOptions { } }; +/** Parse arguments */ +FlitOptions parseArguments(int argCount, char* argList[]); + +/** Returns the usage information as a string */ +std::string usage(std::string progName); + +/** Read file contents entirely into a string */ +std::string readFile(const std::string &filename); + +/** Parse the results file into a vector of results */ +std::vector parseResults(std::istream &in); + +/** Test names sometimes are postfixed with "_idx" + . Remove that postfix */ +std::string removeIdxFromName(const std::string &name); + +inline void outputResults (const std::vector& results, + std::ostream& out) +{ + // Output the column headers + out << "name," + "host," + "compiler," + "optl," + "switches," + "precision," + "score," + "score_d," + "resultfile," + "comparison," + "comparison_d," + "file," + "nanosec" + << std::endl; + for(const auto& result: results){ + out + << result.name() << "," // test case name + << FLIT_HOST << "," // hostname + << FLIT_COMPILER << "," // compiler + << FLIT_OPTL << "," // optimization level + << FLIT_SWITCHES << "," // compiler flags + << result.precision() << "," // precision + ; + + if (result.result().type() == Variant::Type::LongDouble) { + out + << as_int(result.result().longDouble()) << "," // score + << result.result().longDouble() << "," // score_d + ; + } else { + out + << FLIT_NULL << "," // score + << FLIT_NULL << "," // score_d + ; + } + + if (result.resultfile().empty()) { + out << FLIT_NULL << ","; // resultfile + } else { + out << result.resultfile() << ","; // resultfile + } + + if (result.is_comparison_null()) { + out + << FLIT_NULL << "," // comparison + << FLIT_NULL << "," // comparison_d + ; + } else { + out + << as_int(result.comparison()) << "," // comparison + << result.comparison() << "," // comparison_d + ; + } + + out + << FLIT_FILENAME << "," // executable filename + << result.nanosecs() // nanoseconds + << std::endl; + } +} + + +template +void runTestWithDefaultInput(TestFactory* factory, + std::vector& totResults, + const std::string &filebase = "", + bool shouldTime = true, + int timingLoops = 1) { + auto test = factory->get(); + auto ip = test->getDefaultInput(); + auto results = test->run(ip, filebase, shouldTime, timingLoops); + totResults.insert(totResults.end(), results.begin(), results.end()); + info_stream.flushout(); +} + +template +long double runComparison_impl(TestFactory* factory, const TestResult >, + const TestResult &res) { + auto test = factory->get(); + if (!res.resultfile().empty()) { + assert(res.result().type() == Variant::Type::None); + assert( gt.result().type() == Variant::Type::None); + return test->compare(readFile(gt.resultfile()), + readFile(res.resultfile())); + } else if (res.result().type() == Variant::Type::LongDouble) { + return test->compare(gt.result().longDouble(), res.result().longDouble()); + } else { throw std::runtime_error("Unsupported variant type"); } +} + +inline long double runComparison(TestFactory* factory, const TestResult >, + const TestResult &res) { + // TODO: after moving to lazy file load, load file contents at comparison + if (res.precision() == "f") { + return runComparison_impl(factory, gt, res); + } else if (res.precision() == "d") { + return runComparison_impl(factory, gt, res); + } else if (res.precision() == "e") { + return runComparison_impl(factory, gt, res); + } else { throw std::runtime_error("Unrecognized precision encountered"); } +} + + + /** Returns true if the element is in the container */ template bool isIn(Container c, Element e) { @@ -81,12 +219,6 @@ class ParseException : std::exception { const std::string _message; }; -/** Parse arguments */ -FlitOptions parseArguments(int argCount, char* argList[]); - -/** Returns the usage information as a string */ -std::string usage(std::string progName); - inline int runFlitTests(int argc, char* argv[]) { // Argument parsing FlitOptions options; @@ -104,53 +236,97 @@ inline int runFlitTests(int argc, char* argv[]) { } if (options.listTests) { - for (auto& test : getKeys(flit::getTests())) { + for (auto& test : getKeys(getTests())) { std::cout << test << std::endl; } return 0; } if (options.verbose) { - flit::info_stream.show(); + info_stream.show(); } std::unique_ptr stream_deleter; std::ostream *outstream = &std::cout; + std::string test_result_filebase(FLIT_FILENAME); if (!options.output.empty()) { stream_deleter.reset(new std::ofstream(options.output.c_str())); outstream = stream_deleter.get(); + test_result_filebase = options.output; } std::cout.precision(1000); //set cout to print many decimal places - flit::info_stream.precision(1000); + info_stream.precision(1000); #ifdef __CUDA__ - flit::initDeviceData(); + initDeviceData(); #endif - flit::ResultType scores; - auto testMap = flit::getTests(); + std::vector results; + std::vector groundTruthResults; + if (!options.groundTruth.empty()) { + std::ifstream gtfile(options.groundTruth); + // TODO: only load file contents at time of comparison + groundTruthResults = parseResults(gtfile); + } + + auto testMap = getTests(); for (auto& testName : options.tests) { auto factory = testMap[testName]; if (options.precision == "all" || options.precision == "float") { - runTestWithDefaultInput(factory, scores, options.timing, - options.timingLoops); + runTestWithDefaultInput(factory, results, test_result_filebase, + options.timing, options.timingLoops); } if (options.precision == "all" || options.precision == "double") { - runTestWithDefaultInput(factory, scores, options.timing, - options.timingLoops); + runTestWithDefaultInput(factory, results, test_result_filebase, + options.timing, options.timingLoops); } if (options.precision == "all" || options.precision == "long double") { - runTestWithDefaultInput(factory, scores, options.timing, - options.timingLoops); + runTestWithDefaultInput( + factory, results, test_result_filebase, options.timing, + options.timingLoops); } + // TODO: dump string result to file because we might run out of memory } #if defined(__CUDA__) && !defined(__CPUKERNEL__) cudaDeviceSynchronize(); #endif - outputResults(scores, *outstream); + // Sort the results first by name then by precision + auto testComparator = [](const TestResult &a, const TestResult &b) { + if (a.name() != b.name()) { + return a.name() < b.name(); + } else { + return a.precision() < b.precision(); + } + }; + std::sort(results.begin(), results.end(), testComparator); + std::sort(groundTruthResults.begin(), groundTruthResults.end(), + testComparator); + + // Let's now run the ground-truth comparisons + if (groundTruthResults.size() > 0) { + for (auto& res : results) { + auto factory = testMap[removeIdxFromName(res.name())]; + // Use binary search to find the first associated ground truth element + auto gtIter = std::lower_bound(groundTruthResults.begin(), + groundTruthResults.end(), res, + testComparator); + // Compare the two results if the element was found + if (gtIter != groundTruthResults.end() && + res.name() == (*gtIter).name() && + res.precision() == (*gtIter).precision()) + { + res.set_comparison(runComparison(factory, *gtIter, res)); + } + } + } + + // Create the main results output + outputResults(results, *outstream); return 0; } +} // end of namespace flit + #endif // FLIT_H diff --git a/src/flitHelpers.cpp b/src/flitHelpers.cpp index 3e37b7f5..f61d13cb 100644 --- a/src/flitHelpers.cpp +++ b/src/flitHelpers.cpp @@ -2,11 +2,13 @@ // they utilize the watch data for sensitive points // of computation. -#include "flitHelpers.hpp" +#include "flitHelpers.h" #include #include +#include + namespace flit { const std::vector @@ -73,29 +75,52 @@ const std::vector long_rands = setRandSeq(RAND_VECT_SI thread_local InfoStream info_stream; -std::mutex ostreamMutex; std::ostream& operator<<(std::ostream& os, const unsigned __int128 i){ - if(i == 0) os << 0; - else{ - std::ostringstream ost; - uint64_t hi = i >> 64; - uint64_t lo = (uint64_t)i; - ostreamMutex.lock(); - auto bflags = os.flags(); - os.flags(std::ios::hex & ~std::ios::showbase); - ost.flags(std::ios::hex & ~std::ios::showbase); - ost << lo; - os << "0x" << hi; - for(uint32_t x = 0; x < 16 - ost.str().length(); ++x){ - os << "0"; - } - os << ost.str(); - os.flags( bflags ); - ostreamMutex.unlock(); + std::ostringstream ost; + uint64_t hi = i >> 64; + uint64_t lo = (uint64_t)i; + auto bflags = os.flags(); + os.flags(std::ios::hex & ~std::ios::showbase); + ost.flags(std::ios::hex & ~std::ios::showbase); + ost << lo; + os << "0x" << hi; + for(uint32_t x = 0; x < 16 - ost.str().length(); ++x){ + os << "0"; } + os << ost.str(); + os.flags( bflags ); return os; } +unsigned __int128 stouint128(const std::string &str) { + uint64_t hi, lo; + // TODO: make this more efficient (maybe). + std::string copy; + if (str[0] == '0' && str[1] == 'x') { + copy = std::string(str.begin() + 2, str.end()); + } else { + copy = str; + } + + // Convert each section of 8-bytes (16 characters) + assert(copy.size() <= 32); + if (copy.size() <= 16) { + hi = 0; + lo = std::stoull(copy, nullptr, 16); + } else { + auto mid = copy.end() - 16; + hi = std::stoull(std::string(copy.begin(), mid), nullptr, 16); + lo = std::stoull(std::string(mid, copy.end()), nullptr, 16); + } + + // Combine the two 64-bit values. + unsigned __int128 val; + val = hi; + val = val << 64; + val += lo; + return val; +} + } // end of namespace flit diff --git a/src/flitHelpers.hpp b/src/flitHelpers.h similarity index 99% rename from src/flitHelpers.hpp rename to src/flitHelpers.h index 387dbaad..6502164d 100644 --- a/src/flitHelpers.hpp +++ b/src/flitHelpers.h @@ -5,8 +5,8 @@ #ifndef FLIT_HELPERS_HPP #define FLIT_HELPERS_HPP -#include "InfoStream.hpp" -#include "CUHelpers.hpp" +#include "InfoStream.h" +#include "CUHelpers.h" #include #include @@ -89,6 +89,7 @@ std::vector const & getRandSeq(); std::ostream& operator<<(std::ostream&, const unsigned __int128); +unsigned __int128 stouint128(const std::string &str); HOST_DEVICE inline float