From 1ced3f0c0f28bf8da80def5035e3818b0acece08 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Thu, 11 Jan 2018 17:45:10 -0700 Subject: [PATCH 01/20] Remove TestInput and CuTestInput However, the litmus tests do not compile yet. This is an incremental update --- data/tests/Empty.cpp | 14 +- documentation/writing-test-cases.md | 4 +- gensrc/testcase.py | 25 +- inputGen/groundtruth.cpp | 7 +- inputGen/testbed.cpp | 5 +- litmus-tests/disabled/SimpleCHull.cpp | 4 +- .../tests/DistributivityOfMultiplication.cpp | 36 +- litmus-tests/tests/DoHariGSBasic.cpp | 20 +- litmus-tests/tests/DoHariGSImproved.cpp | 20 +- litmus-tests/tests/DoMatrixMultSanity.cpp | 24 +- litmus-tests/tests/DoOrthoPerturbTest.cpp | 29 +- litmus-tests/tests/DoSimpleRotate90.cpp | 10 +- .../tests/DoSkewSymCPRotationTest.cpp | 16 +- litmus-tests/tests/FMACancel.cpp | 8 +- litmus-tests/tests/InliningProblem.cpp | 10 +- litmus-tests/tests/KahanSum.cpp | 17 +- litmus-tests/tests/Paranoia.cpp | 6 +- litmus-tests/tests/ReciprocalMath.cpp | 18 +- litmus-tests/tests/RotateAndUnrotate.cpp | 13 +- litmus-tests/tests/RotateFullCircle.cpp | 16 +- litmus-tests/tests/ShewchukSum.cpp | 17 +- litmus-tests/tests/SinInt.cpp | 10 +- litmus-tests/tests/TrianglePHeron.cpp | 25 +- litmus-tests/tests/TrianglePSylv.cpp | 14 +- litmus-tests/tests/langois.cpp | 12 +- litmus-tests/tests/tinys.cpp | 501 +++--------------- src/TestBase.h | 111 +--- 27 files changed, 269 insertions(+), 723 deletions(-) diff --git a/data/tests/Empty.cpp b/data/tests/Empty.cpp index 2e9bac13..d0bc4b49 100644 --- a/data/tests/Empty.cpp +++ b/data/tests/Empty.cpp @@ -4,7 +4,7 @@ template GLOBAL -void Empty_kernel(const flit::CuTestInput* tiList, double* results) { +void Empty_kernel(const T* const* tiList, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -27,7 +27,7 @@ class Empty : public flit::TestBase { /** Specify how many floating-point inputs your algorithm takes. * * Can be zero. If it is zero, then getDefaultInput should return an empty - * TestInput object which is as simple as "return {};" + * std::vector, which is as simple as "return {};" */ virtual size_t getInputsPerRun() override { return 1; } @@ -38,12 +38,10 @@ class Empty : public flit::TestBase { * time with getInputsPerRun() elements in ti.vals. * * If your algorithm takes no inputs, then you can simply return an empty - * TestInput object. It is as simple as "return {};". + * std::vector object. It is as simple as "return {};". */ - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { 1.0 }; - return ti; + virtual std::vector getDefaultInput() override { + return { 1.0 }; } /** Custom comparison methods @@ -108,7 +106,7 @@ class Empty : public flit::TestBase { * The value returned by run_impl is the same value used in compare() * implemented above. */ - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector &ti) override { return flit::Variant(); } diff --git a/documentation/writing-test-cases.md b/documentation/writing-test-cases.md index 799d7803..634a3f0b 100644 --- a/documentation/writing-test-cases.md +++ b/documentation/writing-test-cases.md @@ -39,7 +39,7 @@ empty `run_impl()` function like the following: ```c++ // Default implementation does nothing -virtual flit::Variant run_impl(const flit::TestInput& ti) override { +virtual flit::Variant run_impl(const std::vector& ti) override { FLIT_UNUSED(ti); return flit::Variant(); } @@ -53,7 +53,7 @@ meaningful: ```c++ template<> -flit::Variant MyTestClass::run_impl(const flit::TestInput& ti) { +flit::Variant MyTestClass::run_impl(const std::vector& ti) { // test logic here ... return something_meaningful; } diff --git a/gensrc/testcase.py b/gensrc/testcase.py index b4772de4..59443e6d 100644 --- a/gensrc/testcase.py +++ b/gensrc/testcase.py @@ -8,7 +8,8 @@ # - input_count: how many inputs the test will take # - default_input: populate ti.vals vector. # - vars_initialize: initialize scope variable for the test using ti.vals -# - cu_vars_initialize: initialize scope variables for the test in CUDA using tiList[idx].vals +# - cu_vars_initialize: initialize scope variables for the test in CUDA using +# tiList[idx].vals # - func_body: test body that is shared between cuda and non-cuda. Populate score template_string = ''' #include "flit.h" @@ -16,7 +17,7 @@ template GLOBAL void -{name}Kernel(const flit::CuTestInput* tiList, double* results) {{ +{name}Kernel(const T* const* tiList, double* results) {{ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -40,8 +41,8 @@ class {name} : public flit::TestBase {{ : flit::TestBase(std::move(id)) {{}} virtual size_t getInputsPerRun() override {{ return {input_count}; }} - virtual flit::TestInput getDefaultInput() override {{ - flit::TestInput ti; + virtual std::vector getDefaultInput() override {{ + std::vector ti; {default_input} @@ -54,11 +55,11 @@ class {name} : public flit::TestBase {{ }} virtual - flit::Variant run_impl(const flit::TestInput& ti) override {{ + flit::Variant run_impl(const std::vector& ti) override {{ T score = 0.0; flit::info_stream << id << ": Starting test with parameters" << std::endl; - for (T val : ti.vals) {{ + for (T val : ti) {{ flit::info_stream << id << ": " << val << std::endl; }} @@ -93,11 +94,11 @@ def __init__(self, name, default_input_vals): # setup the test self.default_input_lines = [ - 'ti.vals.push_back({0});'.format(x) for x in default_input_vals] + 'ti.push_back({0});'.format(x) for x in default_input_vals] self.vars_initialize_lines = [ - 'T in_{0} = ti.vals[{0}];'.format(i+1) for i in range(self.input_count)] + 'T in_{0} = ti[{0}];'.format(i+1) for i in range(self.input_count)] self.cu_vars_initialize_lines = [ - 'T in_{0} = tiList[idx].vals[{0}];'.format(i+1) for i in range(self.input_count)] + 'T in_{0} = tiList[idx][{0}];'.format(i+1) for i in range(self.input_count)] # Create an environment for the function body env = Environment({ @@ -109,9 +110,11 @@ def __init__(self, name, default_input_vals): self.func_body_lines = [] for i in range(10): var = Variable('e{0}'.format(i+1), 'T') - self.func_body_lines.append('{0} {1} = {2};'.format(var.type, var.name, random_expression(env, 3))) + self.func_body_lines.append('{0} {1} = {2};'.format(var.type, var.name, + random_expression(env, 3))) env[var.name] = var - self.func_body_lines.append('score = {0};'.format(random_expression(env, 4, vars_only=True))) + self.func_body_lines.append('score = {0};'.format(random_expression(env, 4, + vars_only=True))) def write(self, directory='.'): ''' diff --git a/inputGen/groundtruth.cpp b/inputGen/groundtruth.cpp index 2b94223b..44b1323f 100644 --- a/inputGen/groundtruth.cpp +++ b/inputGen/groundtruth.cpp @@ -8,16 +8,15 @@ namespace { runGroundtruth_impl(std::string testName, std::function randGen) { - using flit::TestInput; using flit::Vector; auto test = flit::getTests()[testName]->get(); - TestInput input = test->getDefaultInput(); - input.vals = Vector(test->getInputsPerRun(), randGen).getData(); + auto input = test->getDefaultInput(); + input = Vector(test->getInputsPerRun(), randGen).getData(); auto scores = test->run(input); // Return only the first score. Ignore the key - return { input.vals, std::get<0>(scores.begin()->second) }; + return { input, std::get<0>(scores.begin()->second) }; } } // end of unnamed namespace diff --git a/inputGen/testbed.cpp b/inputGen/testbed.cpp index a9b87a6f..4a65386c 100644 --- a/inputGen/testbed.cpp +++ b/inputGen/testbed.cpp @@ -10,13 +10,10 @@ namespace { runTestbed_impl(const std::string &testName, const std::vector &inputvals) { - using flit::TestInput; using flit::Vector; auto test = flit::getTests()[testName]->get(); - TestInput input = test->getDefaultInput(); - input.vals = inputvals; - auto scores = test->run(input); + auto scores = test->run(inputvals); // Return only the first score. Ignore the key return std::get<0>(scores.begin()->second); diff --git a/litmus-tests/disabled/SimpleCHull.cpp b/litmus-tests/disabled/SimpleCHull.cpp index 82ae6580..29e3e206 100644 --- a/litmus-tests/disabled/SimpleCHull.cpp +++ b/litmus-tests/disabled/SimpleCHull.cpp @@ -12,12 +12,12 @@ class SimpleCHull: public flit::TestBase { SimpleCHull(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun(){ return 0; } - virtual flit::TestInput getDefaultInput(){ return {}; } + virtual std::vector getDefaultInput(){ return {}; } protected: virtual flit::KernelFunction* getKernel() { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) { + virtual flit::Variant run_impl(const std::vector& ti) { FLIT_UNUSED(ti); CHullEdges.clear(); PointList.clear(); diff --git a/litmus-tests/tests/DistributivityOfMultiplication.cpp b/litmus-tests/tests/DistributivityOfMultiplication.cpp index 2c0a3e0d..b578ac4e 100644 --- a/litmus-tests/tests/DistributivityOfMultiplication.cpp +++ b/litmus-tests/tests/DistributivityOfMultiplication.cpp @@ -13,15 +13,15 @@ template GLOBAL void -DistOfMultKernel(const flit::CuTestInput* tiList, double* results){ +DistOfMultKernel(const T* const* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - T a = tiList[idx].vals[0]; - T b = tiList[idx].vals[1]; - T c = tiList[idx].vals[2]; + T a = tiList[idx][0]; + T b = tiList[idx][1]; + T c = tiList[idx][2]; auto distributed = (a * c) + (b * c); results[idx] = distributed; @@ -34,17 +34,17 @@ class DistributivityOfMultiplication : public flit::TestBase { : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override; + virtual std::vector getDefaultInput() override; protected: virtual flit::KernelFunction* getKernel() override { return DistOfMultKernel; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - T a = ti.vals[0]; - T b = ti.vals[1]; - T c = ti.vals[2]; + virtual flit::Variant run_impl(const std::vector& ti) override { + T a = ti[0]; + T b = ti[1]; + T c = ti[2]; auto distributed = (a * c) + (b * c); @@ -63,18 +63,16 @@ class DistributivityOfMultiplication : public flit::TestBase { // Define the inputs template<> -inline flit::TestInput +inline std::vector DistributivityOfMultiplication::getDefaultInput() { auto convert = [](uint32_t x) { return flit::as_float(x); }; - flit::TestInput ti; - // Put in canned values of previously found diverging inputs // These are entered as hex values to maintain the exact value instead of trying // to specify enough decimal digits to get the same floating-point value - ti.vals = { + std::vector ti = { convert(0x6b8b4567), convert(0x65ba0c1e), convert(0x49e753d2), @@ -124,18 +122,16 @@ DistributivityOfMultiplication::getDefaultInput() { } template<> -inline flit::TestInput +inline std::vector DistributivityOfMultiplication::getDefaultInput() { auto convert = [](uint64_t x) { return flit::as_float(x); }; - flit::TestInput ti; - // Put in canned values of previously found diverging inputs // These are entered as hex values to maintain the exact value instead of trying // to specify enough decimal digits to get the same floating-point value - ti.vals = { + std::vector ti = { convert(0x7712d691ff8158c1), convert(0x7a71b704fdd6a840), convert(0x019b84dddaba0d31), @@ -181,7 +177,7 @@ DistributivityOfMultiplication::getDefaultInput() { } template<> -inline flit::TestInput +inline std::vector DistributivityOfMultiplication::getDefaultInput() { // Here we are assuming that long double represents 80 bits auto convert = [](uint64_t left_half, uint64_t right_half) { @@ -191,12 +187,10 @@ DistributivityOfMultiplication::getDefaultInput() { return flit::as_float(val); }; - flit::TestInput ti; - // Put in canned values of previously found diverging inputs // These are entered as hex values to maintain the exact value instead of trying // to specify enough decimal digits to get the same floating-point value - ti.vals = { + std::vector ti = { convert(0x2b99, 0x2bb4d082ca2e7ec7), // 3.586714e-1573 convert(0x725a, 0x14c0a0cd445b52d5), // 6.131032e+3879 convert(0x075d, 0x0bc91b713fc2fba5), // 4.278225e-4366 diff --git a/litmus-tests/tests/DoHariGSBasic.cpp b/litmus-tests/tests/DoHariGSBasic.cpp index ac546d65..9ed54bff 100644 --- a/litmus-tests/tests/DoHariGSBasic.cpp +++ b/litmus-tests/tests/DoHariGSBasic.cpp @@ -7,14 +7,14 @@ template GLOBAL void -DoHGSBTestKernel(const flit::CuTestInput* tiList, double* result){ +DoHGSBTestKernel(const T* const* tiList, double* result){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx].vals; + const T* vals = tiList[idx]; flit::VectorCU a(vals, 3); flit::VectorCU b(vals + 3, 3); flit::VectorCU c(vals + 6, 3); @@ -39,20 +39,20 @@ class DoHariGSBasic: public flit::TestBase { DoHariGSBasic(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 9; } - virtual flit::TestInput getDefaultInput() override; + virtual std::vector getDefaultInput() override; protected: virtual flit::KernelFunction* getKernel() override { return DoHGSBTestKernel; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { using flit::operator<<; long double score = 0.0; //matrix = {a, b, c}; - flit::Vector a = {ti.vals[0], ti.vals[1], ti.vals[2]}; - flit::Vector b = {ti.vals[3], ti.vals[4], ti.vals[5]}; - flit::Vector c = {ti.vals[6], ti.vals[7], ti.vals[8]}; + flit::Vector a = {ti[0], ti[1], ti[2]}; + flit::Vector b = {ti[3], ti[4], ti[5]}; + flit::Vector c = {ti[6], ti[7], ti[8]}; auto r1 = a.getUnitVector(); //crit = r1[0]; @@ -99,13 +99,11 @@ namespace { } // end of unnamed namespace template -flit::TestInput DoHariGSBasic::getDefaultInput() { +std::vector DoHariGSBasic::getDefaultInput() { T e = getSmallValue(); - flit::TestInput ti; - // Just one test - ti.vals = { + std::vector ti = { 1, e, e, // vec a 1, e, 0, // vec b 1, 0, e, // vec c diff --git a/litmus-tests/tests/DoHariGSImproved.cpp b/litmus-tests/tests/DoHariGSImproved.cpp index 59831214..44b324ea 100644 --- a/litmus-tests/tests/DoHariGSImproved.cpp +++ b/litmus-tests/tests/DoHariGSImproved.cpp @@ -6,13 +6,13 @@ template GLOBAL void -DoHGSITestKernel(const flit::CuTestInput* tiList, double* results){ +DoHGSITestKernel(const T* const* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx].vals; + const T* vals = tiList[idx]; flit::VectorCU a(vals, 3); flit::VectorCU b(vals + 3, 3); flit::VectorCU c(vals + 6, 3); @@ -37,17 +37,17 @@ class DoHariGSImproved: public flit::TestBase { DoHariGSImproved(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 9; } - virtual flit::TestInput getDefaultInput() override; + virtual std::vector getDefaultInput() override; protected: virtual flit::KernelFunction* getKernel() override { return DoHGSITestKernel; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { long double score = 0.0; //matrix = {a, b, c}; - flit::Vector a = {ti.vals[0], ti.vals[1], ti.vals[2]}; - flit::Vector b = {ti.vals[3], ti.vals[4], ti.vals[5]}; - flit::Vector c = {ti.vals[6], ti.vals[7], ti.vals[8]}; + flit::Vector a = {ti[0], ti[1], ti[2]}; + flit::Vector b = {ti[3], ti[4], ti[5]}; + flit::Vector c = {ti[6], ti[7], ti[8]}; auto r1 = a.getUnitVector(); auto r2 = (b - r1 * (b ^ r1)).getUnitVector(); @@ -85,13 +85,11 @@ namespace { } // end of unnamed namespace template -flit::TestInput DoHariGSImproved::getDefaultInput() { +std::vector DoHariGSImproved::getDefaultInput() { T e = getSmallValue(); - flit::TestInput ti; - // Just one test - ti.vals = { + std::vector ti = { 1, e, e, // vec a 1, e, 0, // vec b 1, 0, e, // vec c diff --git a/litmus-tests/tests/DoMatrixMultSanity.cpp b/litmus-tests/tests/DoMatrixMultSanity.cpp index 3c1f14d2..9eeda6e6 100644 --- a/litmus-tests/tests/DoMatrixMultSanity.cpp +++ b/litmus-tests/tests/DoMatrixMultSanity.cpp @@ -9,14 +9,14 @@ template GLOBAL void -DoMatrixMultSanityKernel(const flit::CuTestInput* tiList, double* results){ +DoMatrixMultSanityKernel(const T* const* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif auto ti = tiList[idx]; - auto b = flit::VectorCU(ti.vals, ti.length); + auto b = flit::VectorCU(ti, ti.length); auto c = flit::MatrixCU::Identity(ti.length) * b; results[idx] = c.L1Distance(b); } @@ -28,22 +28,18 @@ class DoMatrixMultSanity: public flit::TestBase { virtual size_t getInputsPerRun() override { return 16; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.highestDim = getInputsPerRun(); - ti.min = -6; - ti.max = 6; - ti.vals = std::move( - flit::Vector::getRandomVector(getInputsPerRun()).getData()); - return ti; + virtual std::vector getDefaultInput() override { + return flit::Vector::getRandomVector(getInputsPerRun()).getData(); } protected: - virtual flit::KernelFunction* getKernel() override { return DoMatrixMultSanityKernel; } + virtual flit::KernelFunction* getKernel() override { + return DoMatrixMultSanityKernel; + } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto dim = ti.vals.size(); - flit::Vector b(ti.vals); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto dim = ti.size(); + flit::Vector b(ti); auto c = flit::Matrix::Identity(dim) * b; bool eq = (c == b); flit::info_stream << id << ": Product is: " << c << std::endl; diff --git a/litmus-tests/tests/DoOrthoPerturbTest.cpp b/litmus-tests/tests/DoOrthoPerturbTest.cpp index 36cd7c75..4fe1d8af 100644 --- a/litmus-tests/tests/DoOrthoPerturbTest.cpp +++ b/litmus-tests/tests/DoOrthoPerturbTest.cpp @@ -5,10 +5,15 @@ #include +namespace { + const int iters = 200; + const int ulp_inc = 1; +} + template GLOBAL void -DoOPTKernel(const flit::CuTestInput* tiList, double* results){ +DoOPTKernel(const T* const* tiList, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else @@ -16,14 +21,12 @@ DoOPTKernel(const flit::CuTestInput* tiList, double* results){ #endif auto ti = tiList[idx]; - auto iters = ti.iters; - auto dim = ti.highestDim; double score = 0.0; cuvector orthoCount(dim, 0.0); // we use a double literal above as a workaround for Intel 15-16 compiler // bug: // https://software.intel.com/en-us/forums/intel-c-compiler/topic/565143 - flit::VectorCU a(ti.vals, ti.length); + flit::VectorCU a(ti, ti.size()); flit::VectorCU b = a.genOrthoVector(); T backup; @@ -59,34 +62,26 @@ class DoOrthoPerturbTest : public flit::TestBase { DoOrthoPerturbTest(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 16; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.iters = 200; - ti.ulp_inc = 1; - + virtual std::vector getDefaultInput() override { auto dim = getInputsPerRun(); - ti.highestDim = dim; - ti.vals = std::vector(dim); - for(decltype(dim) x = 0; x < dim; ++x) ti.vals[x] = static_cast(1 << x); - + ti = std::vector(dim); + for(decltype(dim) x = 0; x < dim; ++x) ti[x] = static_cast(1 << x); return ti; } protected: virtual flit::KernelFunction* getKernel() override { return DoOPTKernel; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { using flit::operator<<; - auto iters = ti.iters; auto dim = getInputsPerRun(); - auto ulp_inc = ti.ulp_inc; long double score = 0.0; std::vector orthoCount(dim, 0.0); // we use a double literal above as a workaround for Intel 15-16 compiler // bug: // https://software.intel.com/en-us/forums/intel-c-compiler/topic/565143 - flit::Vector a(ti.vals); + flit::Vector a(ti); flit::Vector b = a.genOrthoVector(); T backup; diff --git a/litmus-tests/tests/DoSimpleRotate90.cpp b/litmus-tests/tests/DoSimpleRotate90.cpp index 25b81a8d..86b96f6a 100644 --- a/litmus-tests/tests/DoSimpleRotate90.cpp +++ b/litmus-tests/tests/DoSimpleRotate90.cpp @@ -30,17 +30,15 @@ class DoSimpleRotate90: public flit::TestBase { DoSimpleRotate90(std::string id):flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { 1, 1, 1 }; - return ti; + virtual std::vector getDefaultInput() override { + return { 1, 1, 1 }; } protected: virtual flit::KernelFunction* getKernel() override { return DoSR90Kernel; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - flit::Vector A(ti.vals); + virtual flit::Variant run_impl(const std::vector& ti) override { + flit::Vector A(ti); flit::Vector expected = {-A[1], A[0], A[2]}; flit::info_stream << "Rotating A: " << A << ", 1/2 PI radians" << std::endl; A = A.rotateAboutZ_3d(M_PI/2); diff --git a/litmus-tests/tests/DoSkewSymCPRotationTest.cpp b/litmus-tests/tests/DoSkewSymCPRotationTest.cpp index b70d40be..60f35f21 100644 --- a/litmus-tests/tests/DoSkewSymCPRotationTest.cpp +++ b/litmus-tests/tests/DoSkewSymCPRotationTest.cpp @@ -33,24 +33,18 @@ class DoSkewSymCPRotationTest: public flit::TestBase { : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 6; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.min = -6; - ti.max = 6; - auto n = getInputsPerRun(); - ti.highestDim = n; - ti.vals = flit::Vector::getRandomVector(n).getData(); - return ti; + virtual std::vector getDefaultInput() override { + return flit::Vector::getRandomVector(n).getData(); } protected: virtual flit::KernelFunction* getKernel() override { return DoSkewSCPRKernel;} - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { flit::info_stream << "entered " << id << std::endl; long double L1Score = 0.0; - flit::Vector A = { ti.vals[0], ti.vals[1], ti.vals[2] }; - flit::Vector B = { ti.vals[3], ti.vals[4], ti.vals[5] }; + flit::Vector A = { ti[0], ti[1], ti[2] }; + flit::Vector B = { ti[3], ti[4], ti[5] }; A = A.getUnitVector(); B = B.getUnitVector(); flit::info_stream << "A (unit) is: " << std::endl << A << std::endl; diff --git a/litmus-tests/tests/FMACancel.cpp b/litmus-tests/tests/FMACancel.cpp index 72b95804..a43fd1ed 100644 --- a/litmus-tests/tests/FMACancel.cpp +++ b/litmus-tests/tests/FMACancel.cpp @@ -11,14 +11,12 @@ class FMACancel : public flit::TestBase { virtual size_t getInputsPerRun() override { return 2; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { .1, 1.1e5 }; - return ti; + virtual std::vector getDefaultInput() override { + return { .1, 1.1e5 }; } protected: - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { const T a = ti.vals[0]; const T b = ti.vals[1]; const T c = a; diff --git a/litmus-tests/tests/InliningProblem.cpp b/litmus-tests/tests/InliningProblem.cpp index 77d582f3..f03c27a6 100644 --- a/litmus-tests/tests/InliningProblem.cpp +++ b/litmus-tests/tests/InliningProblem.cpp @@ -11,10 +11,8 @@ class InliningProblem : public flit::TestBase { virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { .1, 1.1e3, -.1, -1.1e3, 1/3 }; - return ti; + virtual std::vector getDefaultInput() override { + return { .1, 1.1e3, -.1, -1.1e3, 1/3 }; } protected: @@ -23,8 +21,8 @@ class InliningProblem : public flit::TestBase { const T x_again = -nx; return x_again; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - T a = ti.vals[0]; + virtual flit::Variant run_impl(const std::vector& ti) override { + T a = ti[0]; T also_a = identity(a); const T score = std::sqrt(a) * std::sqrt(also_a); diff --git a/litmus-tests/tests/KahanSum.cpp b/litmus-tests/tests/KahanSum.cpp index 3c3ecbbc..cd19b38f 100644 --- a/litmus-tests/tests/KahanSum.cpp +++ b/litmus-tests/tests/KahanSum.cpp @@ -16,14 +16,14 @@ class KahanSum : public flit::TestBase { KahanSum(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 10000; } - virtual flit::TestInput getDefaultInput() override; + virtual std::vector getDefaultInput() override; protected: - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { Kahan kahan; Shewchuk chuk; T naive = 0.0; - for (auto val : ti.vals) { + for (auto val : ti) { chuk.add(val); kahan.add(val); naive += val; @@ -54,16 +54,15 @@ namespace { } // end of unnamed namespace template -flit::TestInput KahanSum::getDefaultInput() { - flit::TestInput ti; +std::vector KahanSum::getDefaultInput() { auto dim = getInputsPerRun(); - ti.highestDim = dim; - ti.vals = std::vector(dim); + std::vector ti(dim); auto toRepeat = getToRepeat(); for (decltype(dim) i = 0, j = 0; i < dim; - i++, j = (j+1) % toRepeat.size()) { - ti.vals[i] = toRepeat[j]; + i++, j = (j+1) % toRepeat.size()) + { + ti[i] = toRepeat[j]; } return ti; } diff --git a/litmus-tests/tests/Paranoia.cpp b/litmus-tests/tests/Paranoia.cpp index 3b5c2b9e..471742ca 100644 --- a/litmus-tests/tests/Paranoia.cpp +++ b/litmus-tests/tests/Paranoia.cpp @@ -209,10 +209,10 @@ class Paranoia : public flit::TestBase { Paranoia(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 0; } - virtual flit::TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } protected: - virtual flit::Variant run_impl(const flit::TestInput& ti) override; + virtual flit::Variant run_impl(const std::vector& ti) override; void setTimeout(long millis); // starts the timer for checkTimeout() void checkTimeout(); // throws TimeoutError if timer from setTimeout has expired @@ -320,7 +320,7 @@ void sigfpe(int i) } template -flit::Variant Paranoia::run_impl(const flit::TestInput& ti) +flit::Variant Paranoia::run_impl(const std::vector& ti) { FLIT_UNUSED(ti); int timeoutMillis = 1000; diff --git a/litmus-tests/tests/ReciprocalMath.cpp b/litmus-tests/tests/ReciprocalMath.cpp index 3687dd44..b3e81233 100644 --- a/litmus-tests/tests/ReciprocalMath.cpp +++ b/litmus-tests/tests/ReciprocalMath.cpp @@ -11,19 +11,17 @@ class ReciprocalMath : public flit::TestBase { virtual size_t getInputsPerRun() override { return 5; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { .1, 1.1e3, -.1, -1.1e3, 1/3 }; - return ti; + virtual std::vector getDefaultInput() override { + return { .1, 1.1e3, -.1, -1.1e3, 1/3 }; } protected: - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - T a = ti.vals[0]; - T b = ti.vals[1]; - T c = ti.vals[2]; - T d = ti.vals[3]; - T m = ti.vals[4]; + virtual flit::Variant run_impl(const std::vector& ti) override { + T a = ti[0]; + T b = ti[1]; + T c = ti[2]; + T d = ti[3]; + T m = ti[4]; a = a/m; b = b/m; diff --git a/litmus-tests/tests/RotateAndUnrotate.cpp b/litmus-tests/tests/RotateAndUnrotate.cpp index 86987324..516d951f 100644 --- a/litmus-tests/tests/RotateAndUnrotate.cpp +++ b/litmus-tests/tests/RotateAndUnrotate.cpp @@ -1,6 +1,7 @@ #include #include +#include #include @@ -28,20 +29,16 @@ class RotateAndUnrotate: public flit::TestBase { RotateAndUnrotate(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.min = -6; - ti.max = 6; - ti.vals = flit::Vector::getRandomVector(3).getData(); - return ti; + virtual std::vector getDefaultInput() override { + return flit::Vector::getRandomVector(3).getData(); } protected: virtual flit::KernelFunction* getKernel() override { return RaUKern; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { auto theta = M_PI; - auto A = flit::Vector(ti.vals); + auto A = flit::Vector(ti); auto orig = A; flit::info_stream << "Rotate and Unrotate by " << theta << " radians, A is: " << A << std::endl; A.rotateAboutZ_3d(theta); diff --git a/litmus-tests/tests/RotateFullCircle.cpp b/litmus-tests/tests/RotateFullCircle.cpp index 4361d063..2bbf05e8 100644 --- a/litmus-tests/tests/RotateFullCircle.cpp +++ b/litmus-tests/tests/RotateFullCircle.cpp @@ -30,23 +30,17 @@ class RotateFullCircle: public flit::TestBase { RotateFullCircle(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.min = -6; - ti.max = 6; - ti.iters = 200; + virtual std::vector getDefaultInput() override { auto n = getInputsPerRun(); - ti.highestDim = n; - ti.vals = flit::Vector::getRandomVector(n).getData(); - return ti; + return flit::Vector::getRandomVector(n).getData(); } protected: virtual flit::KernelFunction* getKernel() override {return RFCKern; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto n = ti.iters; - flit::Vector A = flit::Vector(ti.vals); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto n = 200; + flit::Vector A = flit::Vector(ti); auto orig = A; T theta = 2 * M_PI / n; flit::info_stream << "Rotate full circle in " << n << " increments, A is: " << A << std::endl; diff --git a/litmus-tests/tests/ShewchukSum.cpp b/litmus-tests/tests/ShewchukSum.cpp index 009fce60..fbe064aa 100644 --- a/litmus-tests/tests/ShewchukSum.cpp +++ b/litmus-tests/tests/ShewchukSum.cpp @@ -12,13 +12,13 @@ class ShewchukSum : public flit::TestBase { ShewchukSum(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 1000; } - virtual flit::TestInput getDefaultInput() override; + virtual std::vector getDefaultInput() override; protected: - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { Shewchuk chuk; T naive = 0.0; - for (auto val : ti.vals) { + for (auto val : ti) { chuk.add(val); naive += val; flit::info_stream @@ -49,16 +49,15 @@ namespace { } // end of unnamed namespace template -flit::TestInput ShewchukSum::getDefaultInput() { - flit::TestInput ti; +std::vector ShewchukSum::getDefaultInput() { auto dim = getInputsPerRun(); - ti.highestDim = dim; - ti.vals = std::vector(dim); + std::vector ti(dim); auto toRepeat = getToRepeat(); for (decltype(dim) i = 0, j = 0; i < dim; - i++, j = (j+1) % toRepeat.size()) { - ti.vals[i] = toRepeat[j]; + i++, j = (j+1) % toRepeat.size()) + { + ti[i] = toRepeat[j]; } return ti; } diff --git a/litmus-tests/tests/SinInt.cpp b/litmus-tests/tests/SinInt.cpp index f12a9acd..dd3a49d1 100644 --- a/litmus-tests/tests/SinInt.cpp +++ b/litmus-tests/tests/SinInt.cpp @@ -13,17 +13,15 @@ class SinInt : public flit::TestBase { virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; + virtual std::vector getDefaultInput() override { const T pi = 3.141592653589793238462643383279502884197169399375105820974944592307816406286208998L; - ti.vals = { pi }; - return ti; + return { pi }; } protected: - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { const int zero = (rand() % 10) / 99; - const T val = ti.vals[0]; + const T val = ti[0]; const T score = std::sin(val + zero) / std::sin(val); flit::info_stream << id << ": score = " << score << std::endl; flit::info_stream << id << ": score - 1.0 = " << score - 1.0 << std::endl; diff --git a/litmus-tests/tests/TrianglePHeron.cpp b/litmus-tests/tests/TrianglePHeron.cpp index a0246ec5..44beb18e 100644 --- a/litmus-tests/tests/TrianglePHeron.cpp +++ b/litmus-tests/tests/TrianglePHeron.cpp @@ -4,6 +4,10 @@ #include +namespace { + int g_iters = 200; +} + template DEVICE T getCArea(const T a, @@ -24,18 +28,18 @@ T getArea(const T a, template GLOBAL void -TrianglePHKern(const flit::CuTestInput* tiList, double* results) { +TrianglePHKern(const T* const* tiList, size_t n, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto ti = tiList[idx]; - T maxval = tiList[idx].vals[0]; + T* start = tiList + idx * n; + T maxval = start[0]; T a = maxval; T b = maxval; T c = maxval * flit::csqrt((T)2.0); - const T delta = maxval / (T)ti.iters; + const T delta = maxval / T(g_iters); const T checkVal = (T)0.5 * b * a; double score = 0.0; @@ -57,23 +61,20 @@ class TrianglePHeron: public flit::TestBase { TrianglePHeron(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.iters = 200; - ti.vals = { 6.0 }; - return ti; + virtual std::vector getDefaultInput() override { + return { 6.0 }; } protected: virtual flit::KernelFunction* getKernel() override {return TrianglePHKern; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - T maxval = ti.vals[0]; + virtual flit::Variant run_impl(const std::vector& ti) override { + T maxval = ti[0]; // start as a right triangle T a = maxval; T b = maxval; T c = maxval * std::sqrt(2); - const T delta = maxval / (T)ti.iters; + const T delta = maxval / T(g_iters); // 1/2 b*h = A // all perturbations will have the same base and height (plus some FP noise) diff --git a/litmus-tests/tests/TrianglePSylv.cpp b/litmus-tests/tests/TrianglePSylv.cpp index 40fb51a7..d614d202 100644 --- a/litmus-tests/tests/TrianglePSylv.cpp +++ b/litmus-tests/tests/TrianglePSylv.cpp @@ -55,23 +55,21 @@ class TrianglePSylv: public flit::TestBase { TrianglePSylv(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.iters = 200; - ti.vals = { 6.0 }; - return ti; + virtual std::vector getDefaultInput() override { + return { 6.0 }; } protected: virtual flit::KernelFunction* getKernel() override {return TrianglePSKern; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - T maxval = ti.vals[0]; + virtual flit::Variant run_impl(const std::vector& ti) override { + T maxval = ti[0]; + auto iters = 200; // start as a right triangle T a = maxval; T b = maxval; T c = maxval * std::sqrt(2); - const T delta = maxval / (T)ti.iters; + const T delta = maxval / T(iters); // 1/2 b*h = A diff --git a/litmus-tests/tests/langois.cpp b/litmus-tests/tests/langois.cpp index 54bd748c..bfdbf972 100644 --- a/litmus-tests/tests/langois.cpp +++ b/litmus-tests/tests/langois.cpp @@ -80,12 +80,12 @@ class langDotFMA: public flit::TestBase { langDotFMA(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 0; } - virtual flit::TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { FLIT_UNUSED(ti); using stype = typename std::vector::size_type; stype size = 16; @@ -115,12 +115,12 @@ class langCompDotFMA: public flit::TestBase { langCompDotFMA(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 0; } - virtual flit::TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { FLIT_UNUSED(ti); using stype = typename std::vector::size_type; stype size = 16; @@ -153,12 +153,12 @@ class langCompDot: public flit::TestBase { langCompDot(std::string id) : flit::TestBase(std::move(id)) {} virtual size_t getInputsPerRun() override { return 0; } - virtual flit::TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { FLIT_UNUSED(ti); using stype = typename std::vector::size_type; stype size = 16; diff --git a/litmus-tests/tests/tinys.cpp b/litmus-tests/tests/tinys.cpp index 86432060..81d1b09d 100644 --- a/litmus-tests/tests/tinys.cpp +++ b/litmus-tests/tests/tinys.cpp @@ -7,34 +7,20 @@ #include -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class FtoDecToF: public flit::TestBase { public: FtoDecToF(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = {std::nextafter(T(0.0), T(1.0))}; - return ti; + virtual std::vector getDefaultInput() override { + return {std::nextafter(T(0.0), T(1.0))}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { std::numeric_limits nlim; // from https://en.wikipedia.org/wiki/IEEE_floating_point uint16_t ddigs = nlim.digits * std::log10(2) + 1; @@ -49,67 +35,39 @@ class FtoDecToF: public flit::TestBase { using flit::TestBase::id; }; - REGISTER_TYPE(FtoDecToF) -// template -// GLOBAL -// void -// subnormalKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class subnormal: public flit::TestBase { public: subnormal(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = {std::nextafter(T(0.0), T(1.0))}; - return ti; + virtual std::vector getDefaultInput() override { + return {std::nextafter(T(0.0), T(1.0))}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { return ti.vals[0] - ti.vals[0] / 2; } using flit::TestBase::id; }; - REGISTER_TYPE(subnormal) -// template -// GLOBAL -// void -// dotProdKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class dotProd: public flit::TestBase { public: dotProd(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 0; } - virtual flit::TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { FLIT_UNUSED(ti); auto size = 16; @@ -127,30 +85,18 @@ class dotProd: public flit::TestBase { }; REGISTER_TYPE(dotProd) -// template -// GLOBAL -// void -// simpleReductionKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class simpleReduction: public flit::TestBase { public: simpleReduction(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 0; } - virtual flit::TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { FLIT_UNUSED(ti); auto vals = flit::getRandSeq(); auto sublen = vals.size() / 4 - 1; @@ -170,38 +116,23 @@ class simpleReduction: public flit::TestBase { }; REGISTER_TYPE(simpleReduction) -//This test adds L1 + L2 + s, where L1 & L2 are large, s small - -// template -// GLOBAL -// void -// addTOLKernel(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class addTOL : public flit::TestBase { public: addTOL(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; + virtual std::vector getDefaultInput() override { std::numeric_limits nls; auto man_bits = nls.digits; std::mt19937 gen(1); std::uniform_int_distribution<> dis(man_bits + 1, nls.max_exponent); - //generate the range of offsets, and then generate the - //mantissa bits for each of the three inputs + // generate the range of offsets, and then generate the + // mantissa bits for each of the three inputs auto L1e = dis(gen); //L1 exponent - //for the ldexp function we're using, it takes an unbiased exponent and - //there is no implied 1 MSB for the mantissa / significand + // for the ldexp function we're using, it takes an unbiased exponent and + // there is no implied 1 MSB for the mantissa / significand T zero = 0.0; auto L1m = flit::as_int(zero); auto L2m = flit::as_int(zero); @@ -211,114 +142,71 @@ class addTOL : public flit::TestBase { L2m &= (gen() & 1) << i; sm &= (gen() & 1) << i; } - ti.vals = { + return { std::ldexp(flit::as_float(L1m), L1e), std::ldexp(flit::as_float(L2m), L1e - 1), std::ldexp(flit::as_float(sm), L1e - man_bits) }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] + ti.vals[1] + ti.vals[2]; + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] + ti[1] + ti[2]; return res; } using flit::TestBase::id; }; - -//the basic idea of this test is A(I) + B + TOL, where A & B are large, -// and TOL is tiny. REGISTER_TYPE(addTOL) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class addSub: public flit::TestBase { public: addSub(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = {T(1.0)}; - return ti; + virtual std::vector getDefaultInput() override { + return { T(1.0) }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { std::numeric_limits nls; auto man_bits = nls.digits; auto big = std::pow(2, (T)man_bits - 1); - auto res = (ti.vals[0] + big) - big; + auto res = (ti[0] + big) - big; return res; } using flit::TestBase::id; }; REGISTER_TYPE(addSub) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class divc: public flit::TestBase { public: divc(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 2; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] / ti.vals[1]; + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] / ti[1]; return res; } using flit::TestBase::id; }; REGISTER_TYPE(divc) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } template class zeroMinusX: public flit::TestBase { @@ -326,85 +214,55 @@ class zeroMinusX: public flit::TestBase { zeroMinusX(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { flit::getRandSeq()[0] }; - return ti; + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0] }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = T(0.0) - ti.vals[0]; + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = T(0.0) - ti[0]; return res; } using flit::TestBase::id; }; REGISTER_TYPE(zeroMinusX) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class xMinusZero: public flit::TestBase { public: xMinusZero(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { flit::getRandSeq()[0] }; - return ti; + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0] }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] - (T)0.0; + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] - T(0.0); return res; } using flit::TestBase::id; }; REGISTER_TYPE(xMinusZero) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class zeroDivX: public flit::TestBase { public: zeroDivX(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { flit::getRandSeq()[0] }; - return ti; + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0] }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { + virtual flit::Variant run_impl(const std::vector& ti) override { auto res = (T)0.0 / ti.vals[0]; return res; } @@ -412,447 +270,250 @@ class zeroDivX: public flit::TestBase { }; REGISTER_TYPE(zeroDivX) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class xDivOne: public flit::TestBase { public: xDivOne(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { flit::getRandSeq()[0] }; - return ti; + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0] }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] / (T)1.0; + virtual flit::Variant run_impl(const std::vector& ti) override { + T res = ti[0] / T(1.0); return res; } using flit::TestBase::id; }; REGISTER_TYPE(xDivOne) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class xDivNegOne: public flit::TestBase { public: xDivNegOne(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { flit::getRandSeq()[0] }; - return ti; + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0] }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] / (T)-1.0; + virtual flit::Variant run_impl(const std::vector& ti) override { + T res = ti[0] / T(-1.0); return res; } using flit::TestBase::id; }; REGISTER_TYPE(xDivNegOne) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class negAdivB: public flit::TestBase { public: negAdivB(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 2; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = -(ti.vals[0] / ti.vals[1]); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = -(ti[0] / ti[1]); return res; } using flit::TestBase::id; }; REGISTER_TYPE(negAdivB) -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - -// template -// class twiceCast: public flit::TestBase { -// public: -// twiceCast(std::string id) : flit::TestBase(std::move(id)){} - -// virtual size_t getInputsPerRun() { return 1; } -// virtual flit::TestInput getDefaultInput(){ -// flit::TestInput ti; -// ti.vals = { flit::getRandSeq()[0] }; -// return ti; -// } -// protected: -// virtual flit::KernelFunction* getKernel() { return nullptr; } -// -// virtual flit::Variant run_impl(const flit::TestInput& ti) { -// //yes, this is ugly. ti.vals s/b vector of floats -// auto res = (T)((std::result_of<::get_next_type(T)>::type)ti.vals[0]); -// return res; -// } -// using flit::TestBase::id; -// }; -// REGISTER_TYPE(twiceCast) - - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class negAminB: public flit::TestBase { public: negAminB(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 2; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = -(ti.vals[0] - ti.vals[1]); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = -(ti[0] - ti[1]); return res; } using flit::TestBase::id; }; REGISTER_TYPE(negAminB) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class xMinusX: public flit::TestBase { public: xMinusX(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 1; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { flit::getRandSeq()[0] }; - return ti; + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0] }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] - ti.vals[0]; + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] - ti[0]; return res; } using flit::TestBase::id; }; REGISTER_TYPE(xMinusX) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class negAplusB: public flit::TestBase { public: negAplusB(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 2; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = -(ti.vals[0] + ti.vals[1]); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = -(ti[0] + ti[1]); return res; } using flit::TestBase::id; }; REGISTER_TYPE(negAplusB) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class aXbDivC: public flit::TestBase { public: aXbDivC(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], flit::getRandSeq()[2], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] * (ti.vals[1] / ti.vals[2]); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] * (ti[1] / ti[2]); return res; } using flit::TestBase::id; }; REGISTER_TYPE(aXbDivC) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class aXbXc: public flit::TestBase { public: aXbXc(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], flit::getRandSeq()[2], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] * (ti.vals[1] * ti.vals[2]); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] * (ti[1] * ti[2]); return res; } using flit::TestBase::id; }; REGISTER_TYPE(aXbXc) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class aPbPc: public flit::TestBase { public: aPbPc(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::getRandSeq()[1], flit::getRandSeq()[2], }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - auto res = ti.vals[0] + (ti.vals[1] + ti.vals[2]); + virtual flit::Variant run_impl(const std::vector& ti) override { + auto res = ti[0] + (ti[1] + ti[2]); return res; } using flit::TestBase::id; }; REGISTER_TYPE(aPbPc) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class xPc1EqC2: public flit::TestBase { public: xPc1EqC2(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::get_tiny1(), flit::get_tiny2(), }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - bool res = ti.vals[0] + ti.vals[1] == ti.vals[2]; + virtual flit::Variant run_impl(const std::vector& ti) override { + bool res = ti[0] + ti[1] == ti[2]; return res ? 1.0 : 0.0; } using flit::TestBase::id; }; REGISTER_TYPE(xPc1EqC2) - -// template -// GLOBAL -// void -// FtoDecToFKern(const flit::CuTestInput* tiList, double* results){ -// #ifdef __CUDA__ -// auto idx = blockIdx.x * blockDim.x + threadIdx.x; -// #else -// auto idx = 0; -// #endif -// results[idx] = 0.0; -// } - template class xPc1NeqC2: public flit::TestBase { public: xPc1NeqC2(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual flit::TestInput getDefaultInput() override { - flit::TestInput ti; - ti.vals = { + virtual std::vector getDefaultInput() override { + return { flit::getRandSeq()[0], flit::get_tiny1(), flit::get_tiny2(), }; - return ti; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } - virtual flit::Variant run_impl(const flit::TestInput& ti) override { - bool res = ti.vals[0] + ti.vals[1] != ti.vals[2]; + virtual flit::Variant run_impl(const std::vector& ti) override { + bool res = (ti[0] + ti[1] != ti[2]); return res ? 1.0 : 0.0; } using flit::TestBase::id; diff --git a/src/TestBase.h b/src/TestBase.h index c8ef00e7..a5943cb3 100644 --- a/src/TestBase.h +++ b/src/TestBase.h @@ -71,63 +71,14 @@ struct TestResult { std::ostream& operator<<(std::ostream& os, const TestResult& res); -template -struct TestInput { - size_t iters; - size_t highestDim; - size_t ulp_inc; - T min; - T max; - std::vector vals; -}; - -/** A simple structure used in CUDA tests. - * - * It stores some values and a pointer, but does not do dynamic allocation nor - * deallocation. The struct does not own the vals pointer at all, just holds - * its value. - * - * The vals are intended to be read-only, because they are inputs. - */ -template -struct CuTestInput { - size_t iters = 100; - size_t highestDim = 0; - size_t ulp_inc = 1; - T min = -6; - T max = 6; - const T* vals = nullptr; // values with size length - size_t length = 0; - - /** Creates a CuTestInput object containing the same info as the TestInput - * - * This is in a separate function instead of constructor to still allow - * initializer lists to construct - * - * Note, the vals pointer will point to the internal data from the TestInput - * object. It is unsafe for the CuTestInput object to outlive this TestInput - * object unless you set a new value for vals. - */ - static CuTestInput fromTestInput(const TestInput& ti) { - CuTestInput cti {}; - cti.iters = ti.iters; - cti.highestDim = ti.highestDim; - cti.ulp_inc = ti.ulp_inc; - cti.min = ti.min; - cti.max = ti.max; - cti.vals = ti.vals.data(); - cti.length = ti.vals.size(); - return cti; - } -}; - /** Definition of a kernel function used by CUDA tests * - * @param arr: array of test input objects, already allocated and populated + * @param arr: array of input arrays, already allocated and populated + * @param n: length of the second dimension of arr * @param results: array where to store results, already allocated */ template -using KernelFunction = void (const CuTestInput*, double*); +using KernelFunction = void (const T* const*, size_t, double*); template using CudaDeleter = void (T*); @@ -182,37 +133,25 @@ class TestDisabledError : public std::runtime_error { */ template std::vector -runKernel(KernelFunction* kernel, const TestInput& ti, size_t stride) { +runKernel(KernelFunction* kernel, const std::vector& ti, size_t stride) { #ifdef __CUDA__ size_t runCount; if (stride < 1) { // the test takes no inputs runCount = 1; } else { - runCount = ti.vals.size() / stride; + runCount = ti.size() / stride; } - std::unique_ptr[]> ctiList(new CuTestInput[runCount]); - for (size_t i = 0; i < runCount; i++) { - ctiList[i] = CuTestInput::fromTestInput(ti); - // just point to a place in the array, like a slice - ctiList[i].vals = ti.vals.data() + i * stride; - ctiList[i].length = stride; - } std::unique_ptr cuResults(new double[runCount]); // Note: __CPUKERNEL__ mode is broken by the change to run the kernel in // multithreaded mode. Its compilation is broken. // TODO: fix __CPUKERNEL__ mode for testing. # ifdef __CPUKERNEL__ - kernel(ctiList, cuResults); + kernel(ti.data(), stride, cuResults); # else // not __CPUKERNEL__ - auto deviceVals = makeCudaArr(ti.vals.data(), ti.vals.size()); - // Reset the pointer value to device addresses - for (size_t i = 0; i < runCount; i++) { - ctiList[i].vals = deviceVals.get() + i * stride; - } - auto deviceInput = makeCudaArr(ctiList.get(), runCount); + auto deviceVals = makeCudaArr(ti.data(), ti.size()); auto deviceResult = makeCudaArr(nullptr, runCount); - kernel<<>>(deviceInput.get(), deviceResult.get()); + kernel<<>>(deviceVals.get(), stride, deviceResult.get()); auto resultSize = sizeof(double) * runCount; checkCudaErrors(cudaMemcpy(cuResults.get(), deviceResult.get(), resultSize, cudaMemcpyDeviceToHost)); @@ -241,11 +180,11 @@ class TestBase { * * @note This method is virtual, so it can be overridden by the test class if * the test is such that it needs to change the notion of running the test - * from only a TestInput object for each result pair. + * from only a std::vector of inputs for each result pair. * * @see getInputsPerRun */ - virtual std::vector run(const TestInput& ti, + virtual std::vector run(const std::vector& ti, const std::string &filebase, const bool shouldTime, const int timingLoops, @@ -255,31 +194,27 @@ class TestBase { using std::chrono::duration; using std::chrono::duration_cast; std::vector results; - TestInput emptyInput { - ti.iters, ti.highestDim, ti.ulp_inc, ti.min, ti.max, {} - }; + std::vector emptyInput; auto stride = getInputsPerRun(); - std::vector> inputSequence; + std::vector> inputSequence; if (stride < 1) { // the test does not take any inputs inputSequence.push_back(ti); } else { // Split up the input. One for each run - auto begin = ti.vals.begin(); - auto runCount = ti.vals.size() / stride; + auto begin = ti.begin(); + auto runCount = ti.size() / stride; for (decltype(runCount) i = 0; i < runCount; i++) { auto end = begin + stride; - TestInput testRunInput = emptyInput; - testRunInput.vals = std::vector(begin, end); - inputSequence.push_back(testRunInput); + inputSequence.emplace_back(std::vector(begin, end)); begin = end; } } // By default, the function to be timed is run_impl - std::function&)> runner; + std::function&)> runner; int runcount = 0; - runner = [this,&runcount] (const TestInput& runInput) { + runner = [this,&runcount] (const std::vector& runInput) { runcount++; return this->run_impl(runInput); }; @@ -287,7 +222,7 @@ class TestBase { // Use the cuda kernel if it is available by replacing runner auto kernel = getKernel(); if (kernel != nullptr) { - runner = [kernel, stride, &runcount] (const TestInput& ti) { + runner = [kernel, stride, &runcount] (const std::vector& ti) { // TODO: implement this timer better. runcount++; auto scorelist = runKernel(kernel, ti, stride); @@ -386,7 +321,7 @@ class TestBase { * in the following way: * test->run(test->getDefaultInput()); */ - virtual TestInput getDefaultInput() = 0; + virtual std::vector getDefaultInput() = 0; /** The number of inputs per test run * @@ -459,7 +394,7 @@ class TestBase { * The returned value (whichever type is chosen) will be used by the public * virtual compare() method. */ - virtual Variant run_impl(const TestInput& ti) = 0; + virtual Variant run_impl(const std::vector& ti) = 0; protected: const std::string id; @@ -470,13 +405,13 @@ template class NullTest : public TestBase { public: NullTest(std::string id) : TestBase(std::move(id)) {} - virtual TestInput getDefaultInput() override { return {}; } + virtual std::vector getDefaultInput() override { return {}; } virtual size_t getInputsPerRun() override { return 0; } virtual std::vector run( - const TestInput&, const bool, const size_t) override { return {}; } + const std::vector&, const bool, const size_t) override { return {}; } protected: virtual KernelFunction* getKernel() override { return nullptr; } - virtual Variant run_impl(const TestInput&) override { return {}; } + virtual Variant run_impl(const std::vector&) override { return {}; } }; class TestFactory { From 3df07ad106c973f84312afcd624f14ddeeb008ca Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Thu, 11 Jan 2018 23:47:57 -0700 Subject: [PATCH 02/20] Fix strict-aliasing problem with type-punning --- src/flitHelpers.h | 79 +++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/src/flitHelpers.h b/src/flitHelpers.h index 6502164d..fc9e9ca4 100644 --- a/src/flitHelpers.h +++ b/src/flitHelpers.h @@ -23,55 +23,27 @@ #define FLIT_UNUSED(x) (void)x #endif -// #ifdef __CUDA__ -// #define HOST_DEVICE __host__ __device__ -// #else -// #define HOST_DEVICE -// #endif - - namespace flit { const int RAND_SEED = 1; const int RAND_VECT_SIZE = 256; -inline -float -get_next_type(long double x){ - FLIT_UNUSED(x); - return 0.0f; -} - -inline -double -get_next_type(float x){ - FLIT_UNUSED(x); - return 0.0; -} - -inline -long double -get_next_type(double x){ - FLIT_UNUSED(x); - return 0.0l; -} - - extern thread_local InfoStream info_stream; - //this section provides a pregenerated random - //sequence that can be used by tests, including - //CUDA + +// this section provides a pregenerated random +// sequence that can be used by tests, including +// CUDA template const std::vector setRandSeq(size_t size, int32_t seed = RAND_SEED){ - //there may be a bug with float uniform_real_dist - //it is giving very different results than double or long double + // there may be a bug with float uniform_real_dist + // it is giving very different results than double or long double std::vector ret(size); std::mt19937 gen; gen.seed(seed); std::uniform_real_distribution dist(-6.0, 6.0); - for(auto& i: ret) i = (T)dist(gen); + for(auto& i: ret) i = T(dist(gen)); return ret; } @@ -91,42 +63,67 @@ getRandSeq(); std::ostream& operator<<(std::ostream&, const unsigned __int128); unsigned __int128 stouint128(const std::string &str); +template +HOST_DEVICE +F as_float_impl(I val) { + static_assert(sizeof(F) == sizeof(I)); + union { + I i; + F f; + } u = { val }; + return u.f; +} + HOST_DEVICE inline float as_float(uint32_t val) { - return *reinterpret_cast(&val); + return as_float_impl(val); } HOST_DEVICE inline double as_float(uint64_t val) { - return *reinterpret_cast(&val); + return as_float_impl(val); } inline long double as_float(unsigned __int128 val) { - return *reinterpret_cast(&val); + return as_float_impl(val); +} + +template +HOST_DEVICE +I as_int_impl(F val) { + static_assert(sizeof(F) == sizeof(I)); + union { + F f; + I i; + } u = { val }; + return u.i; } HOST_DEVICE inline uint32_t as_int(float val) { - return *reinterpret_cast(&val); + return as_int_impl(val); } HOST_DEVICE inline uint64_t as_int(double val) { - return *reinterpret_cast(&val); + return as_int_impl(val); } inline unsigned __int128 as_int(long double val) { const unsigned __int128 zero = 0; - const auto temp = *reinterpret_cast(&val); + const auto temp = as_int_impl(val); return temp & (~zero >> 48); } +// TODO: remove these - need to update a test +// TODO: add an eps() function? Is it already in the standard? + template T get_tiny1(){ From f0ae82e615ed1fd081ceac131ce74bb9921f7e64 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Thu, 11 Jan 2018 23:48:54 -0700 Subject: [PATCH 03/20] Add some tests for flitHelpers.h Need to implement more tests to make it complete --- tests/flit_src/tst_flitHelpers_h.cpp | 75 ++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tests/flit_src/tst_flitHelpers_h.cpp diff --git a/tests/flit_src/tst_flitHelpers_h.cpp b/tests/flit_src/tst_flitHelpers_h.cpp new file mode 100644 index 00000000..317e6766 --- /dev/null +++ b/tests/flit_src/tst_flitHelpers_h.cpp @@ -0,0 +1,75 @@ +#include "test_harness.h" + +#include "flitHelpers.h" + +#include "TestBase.h" // for operator<<(flit::TestResult ...) + +#include +#include +#include +#include + +#include + +template +void tst_setRandSeq() { + size_t n = 10; + int32_t seed = 5024; + auto expected = flit::setRandSeq(n, seed); + auto actual = flit::setRandSeq(n, seed); + for (decltype(n) i = 0; i < n; i++) { + TH_EQUAL(expected[i], actual[i]); + TH_VERIFY(expected[i] <= 6.0 && expected[i] >= -6.0); + } + + // Changing the seed should give you a different sequence + actual = flit::setRandSeq(n, seed + 2); + for (decltype(n) i = 0; i < n; i++) { + TH_NOT_EQUAL(expected[i], actual[i]); + } +} +void tst_setRandSeq_float() { tst_setRandSeq(); } +void tst_setRandSeq_double() { tst_setRandSeq(); } +void tst_setRandSeq_longdouble() { tst_setRandSeq(); } +TH_REGISTER(tst_setRandSeq_float); +TH_REGISTER(tst_setRandSeq_double); +TH_REGISTER(tst_setRandSeq_longdouble); + +void tst_as_float_32bit() { + uint32_t val = 1067316150; + float expected = 1.234; + TH_EQUAL(flit::as_float(val), expected); + + val = 0; + expected = 0.0; + TH_EQUAL(flit::as_float(val), expected); + + val = 1234; + expected = 1.7292e-42; + TH_EQUAL(flit::as_float(val), expected); +} +TH_REGISTER(tst_as_float_32bit); + +void tst_as_float_64bit() { + uint64_t val = 1067316150; + double expected = 5.27324243e-315; + TH_EQUAL(flit::as_float(val), expected); + + val = 0x3ff3be76c8b43958; + expected = 1.234; + + val = 0; + expected = 0.0; + TH_EQUAL(flit::as_float(val), expected); + + val = 1234; + expected = 6.097e-321; + TH_EQUAL(flit::as_float(val), expected); +} +TH_REGISTER(tst_as_float_64bit); + +// TODO: add tst_as_float_80bit() +// TODO: add tst_as_int_32bit() +// TODO: add tst_as_int_64bit() +// TODO: add tst_as_int_128bit() + From bb50c454ec6d46c6e4e5091a060fcddf97197a82 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Thu, 11 Jan 2018 23:49:34 -0700 Subject: [PATCH 04/20] Add one more missing assertion --- tests/flit_src/tst_flitHelpers_h.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/flit_src/tst_flitHelpers_h.cpp b/tests/flit_src/tst_flitHelpers_h.cpp index 317e6766..4561d58f 100644 --- a/tests/flit_src/tst_flitHelpers_h.cpp +++ b/tests/flit_src/tst_flitHelpers_h.cpp @@ -57,6 +57,7 @@ void tst_as_float_64bit() { val = 0x3ff3be76c8b43958; expected = 1.234; + TH_EQUAL(flit::as_float(val), expected); val = 0; expected = 0.0; From 23228e47c8135e50e9ebc82e8b8eec1dc297eec2 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 08:09:01 -0700 Subject: [PATCH 05/20] Add some more tests --- src/flitHelpers.h | 4 ++-- tests/flit_src/tst_flitHelpers_h.cpp | 35 ++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/flitHelpers.h b/src/flitHelpers.h index fc9e9ca4..a9de75ae 100644 --- a/src/flitHelpers.h +++ b/src/flitHelpers.h @@ -66,7 +66,7 @@ unsigned __int128 stouint128(const std::string &str); template HOST_DEVICE F as_float_impl(I val) { - static_assert(sizeof(F) == sizeof(I)); + static_assert(sizeof(F) == sizeof(I), "cannot convert types of different sizes"); union { I i; F f; @@ -94,7 +94,7 @@ as_float(unsigned __int128 val) { template HOST_DEVICE I as_int_impl(F val) { - static_assert(sizeof(F) == sizeof(I)); + static_assert(sizeof(F) == sizeof(I), "cannot convert types of different sizes"); union { F f; I i; diff --git a/tests/flit_src/tst_flitHelpers_h.cpp b/tests/flit_src/tst_flitHelpers_h.cpp index 4561d58f..3beb2615 100644 --- a/tests/flit_src/tst_flitHelpers_h.cpp +++ b/tests/flit_src/tst_flitHelpers_h.cpp @@ -70,7 +70,38 @@ void tst_as_float_64bit() { TH_REGISTER(tst_as_float_64bit); // TODO: add tst_as_float_80bit() -// TODO: add tst_as_int_32bit() -// TODO: add tst_as_int_64bit() // TODO: add tst_as_int_128bit() +void tst_as_int_32bit() { + uint32_t expected = 1067316150; + float val= 1.234; + TH_EQUAL(flit::as_int(val), expected); + + expected = 0; + val = 0.0; + TH_EQUAL(flit::as_int(val), expected); + + expected = 1234; + val = 1.7292e-42; + TH_EQUAL(flit::as_int(val), expected); +} +TH_REGISTER(tst_as_int_32bit); + +void tst_as_int_64bit() { + uint64_t expected = 1067316150; + double val = 5.27324243e-315; + TH_EQUAL(flit::as_int(val), expected); + + expected = 0x3ff3be76c8b43958; + val = 1.234; + TH_EQUAL(flit::as_int(val), expected); + + expected = 0; + val = 0.0; + TH_EQUAL(flit::as_int(val), expected); + + expected = 1234; + val = 6.097e-321; + TH_EQUAL(flit::as_int(val), expected); +} +TH_REGISTER(tst_as_int_64bit); From 61cae210c590057a6da3a13c543095f356cda601 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 16:47:15 -0700 Subject: [PATCH 06/20] Finish testing to_int and to_float --- tests/flit_src/tst_flitHelpers_h.cpp | 99 +++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/tests/flit_src/tst_flitHelpers_h.cpp b/tests/flit_src/tst_flitHelpers_h.cpp index 3beb2615..8b10bf27 100644 --- a/tests/flit_src/tst_flitHelpers_h.cpp +++ b/tests/flit_src/tst_flitHelpers_h.cpp @@ -10,6 +10,18 @@ #include #include +#include // for memcpy() + +namespace { + +unsigned __int128 combine_to_128(uint64_t left_half, uint64_t right_half) { + unsigned __int128 val = left_half; + val = val << 64; + val += right_half; + return val; +} + +} // end of unnamed namespace template void tst_setRandSeq() { @@ -69,7 +81,49 @@ void tst_as_float_64bit() { } TH_REGISTER(tst_as_float_64bit); -// TODO: add tst_as_float_80bit() +void tst_as_float_80bit() { + auto val = combine_to_128(0x0000, 0x0000000000000000); + long double expected = 0.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x3fff, 0x8000000000000000); + expected = 1.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4000, 0x8000000000000000); + expected = 2.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4000, 0xc000000000000000); + expected = 3.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4001, 0x8000000000000000); + expected = 4.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4001, 0xa000000000000000); + expected = 5.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4001, 0xc000000000000000); + expected = 6.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4001, 0xe000000000000000); + expected = 7.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x4002, 0x8000000000000000); + expected = 8.0L; + TH_EQUAL(flit::as_float(val), expected); + + val = combine_to_128(0x2b97, 0xaed3412a32403b66); + expected = 3.586714e-1573L; + TH_EQUAL(flit::as_float(val), expected); +} +TH_REGISTER(tst_as_float_80bit); + // TODO: add tst_as_int_128bit() void tst_as_int_32bit() { @@ -105,3 +159,46 @@ void tst_as_int_64bit() { TH_EQUAL(flit::as_int(val), expected); } TH_REGISTER(tst_as_int_64bit); + +void tst_as_int_128bit() { + auto expected = combine_to_128(0x0000, 0x0000000000000000); + long double val = 0.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x3fff, 0x8000000000000000); + val = 1.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4000, 0x8000000000000000); + val = 2.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4000, 0xc000000000000000); + val = 3.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4001, 0x8000000000000000); + val = 4.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4001, 0xa000000000000000); + val = 5.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4001, 0xc000000000000000); + val = 6.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4001, 0xe000000000000000); + val = 7.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x4002, 0x8000000000000000); + val = 8.0L; + TH_EQUAL(flit::as_int(val), expected); + + expected = combine_to_128(0x2b97, 0xaed3412a32403b66); + val = 3.586714e-1573L; + TH_EQUAL(flit::as_int(val), expected); +} +TH_REGISTER(tst_as_int_128bit); From b5c6af2337b762a59e4a99490f70bae448b82998 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 16:52:43 -0700 Subject: [PATCH 07/20] move get_tiny1() and get_tiny2() to the one litmus test that uses it --- litmus-tests/tests/tinys.cpp | 47 +++++++++++++++++++++++++++++++++--- src/flitHelpers.cpp | 38 ----------------------------- src/flitHelpers.h | 13 ---------- 3 files changed, 43 insertions(+), 55 deletions(-) diff --git a/litmus-tests/tests/tinys.cpp b/litmus-tests/tests/tinys.cpp index 81d1b09d..ab0c2d8d 100644 --- a/litmus-tests/tests/tinys.cpp +++ b/litmus-tests/tests/tinys.cpp @@ -6,6 +6,45 @@ #include #include +namespace { + + template + T get_tiny1() { static_assert(false, "Unimplemented type"); } + + template + T get_tiny2() { static_assert(false, "Unimplemented type"); } + + template <> + float get_tiny1(){ + return 1.175494351-38; + } + + template <> + double get_tiny1(){ + return 2.2250738585072014e-308; + } + + template <> + long double get_tiny1(){ + return 3.362103143112093506262e-4931L; + } + + template <> + float get_tiny2(){ + return 1.175494352-38; + } + + template <> + double get_tiny2(){ + return 2.2250738585072015e-308; + } + + template <> + long double get_tiny2(){ + return 3.362103143112093506263e-4931L; + } + +} // end of unnamed namespace template class FtoDecToF: public flit::TestBase { @@ -481,8 +520,8 @@ class xPc1EqC2: public flit::TestBase { virtual std::vector getDefaultInput() override { return { flit::getRandSeq()[0], - flit::get_tiny1(), - flit::get_tiny2(), + get_tiny1(), + get_tiny2(), }; } protected: @@ -505,8 +544,8 @@ class xPc1NeqC2: public flit::TestBase { virtual std::vector getDefaultInput() override { return { flit::getRandSeq()[0], - flit::get_tiny1(), - flit::get_tiny2(), + get_tiny1(), + get_tiny2(), }; } protected: diff --git a/src/flitHelpers.cpp b/src/flitHelpers.cpp index e5df1d97..b59fef46 100644 --- a/src/flitHelpers.cpp +++ b/src/flitHelpers.cpp @@ -17,44 +17,6 @@ getShuffleSeq(uint_fast32_t size){ return retVal; } -template <> -float -get_tiny1(){ - return 1.175494351-38; -} - -template <> -double -get_tiny1(){ - return 2.2250738585072014e-308; -} - -template <> -long double -get_tiny1(){ - return 3.362103143112093506262e-4931L; -} - -template <> -float -get_tiny2(){ - return 1.175494352-38; -} - -template <> -double -get_tiny2(){ - return 2.2250738585072015e-308; -} - -template <> -long double -get_tiny2(){ - return 3.362103143112093506263e-4931L; -} - - //const std::vector float_rands = setRandSequence(RAND_VECT_SIZE); - template<> const std::vector& getRandSeq(){return float_rands;} diff --git a/src/flitHelpers.h b/src/flitHelpers.h index a9de75ae..26b356e5 100644 --- a/src/flitHelpers.h +++ b/src/flitHelpers.h @@ -121,21 +121,8 @@ as_int(long double val) { return temp & (~zero >> 48); } -// TODO: remove these - need to update a test // TODO: add an eps() function? Is it already in the standard? -template -T -get_tiny1(){ - return (T)0.0; -} - -template -T -get_tiny2(){ - return (T)0.0; -} - template class Matrix; From cb9d9ce8c31813c229b8628086431394a3772fc5 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 17:05:39 -0700 Subject: [PATCH 08/20] Split out Vector and Matrix classes to their own files --- src/Matrix.h | 148 +++++++++++++++++++ src/Vector.h | 246 +++++++++++++++++++++++++++++++ src/flit.h | 2 + src/flitHelpers.h | 361 ---------------------------------------------- 4 files changed, 396 insertions(+), 361 deletions(-) create mode 100644 src/Matrix.h create mode 100644 src/Vector.h diff --git a/src/Matrix.h b/src/Matrix.h new file mode 100644 index 00000000..e20b3db3 --- /dev/null +++ b/src/Matrix.h @@ -0,0 +1,148 @@ +#ifndef FLIT_MATRIX_H +#define FLIT_MATRIX_H + +#include "Vector.h" + +#include // for std::initializer_list +#include // for std::cout +#include // for std::ostream +#include // for std::vector + +namespace flit { + +template +class Matrix { + std::vector> data; +public: + Matrix(unsigned rows, unsigned cols): + data(rows, std::vector(cols, 0)){} + Matrix(Matrix const &m):data(m.data){} + Matrix(std::initializer_list> l): + data(l.size(), std::vector(l.begin()->size())){ + int x = 0; int y = 0; + for(auto r: l){ + for(auto i: r){ + data[x][y] = i; + ++y; + } + ++x; y = 0; + } + } + + friend class Vector; + template + friend std::ostream& operator<<(std::ostream& os, Matrix const &a); + + + bool + operator==(Matrix const &rhs) const { + bool retVal = true; + for(uint x = 0; x < data.size(); ++x){ + for(uint y = 0; y < data[0].size(); ++y){ + if(data[x][y] != rhs.data[x][y]){ + info_stream << "in: " << __func__ << std::endl; + info_stream << "for x,y: " << x << ":" << y << std::endl; + info_stream << "this = " << data[x][y] << "; rhs = " << rhs.data[x][y] << std::endl; + retVal = false; + break; + } + } + } + return retVal; + } + + Matrix + operator*(T const &sca){ + Matrix retVal(data.size(), data[0].size()); + for(uint x = 0; x < data.size(); ++x){ + for(uint y =0; y < data[0].size(); ++y){ + retVal.data[x][y] = data[x][y] * sca; + } + } + return retVal; + } + + //precond: this.w = rhs.h, duh + Matrix + operator*(Matrix const &rhs){ + Matrix retVal(data.size(), rhs.data[0].size()); + for(uint bcol = 0; bcol < rhs.data[0].size(); ++bcol){ + for(uint x = 0; x < data.size(); ++x){ + for(uint y = 0; y < data[0].size(); ++y){ + retVal.data[x][bcol] += data[x][y] * rhs.data[y][bcol]; + } + } + } + return retVal; + } + + //precond: dim(v) == 3 + static + Matrix + SkewSymCrossProdM(Vector const &v){ + return Matrix( + {{0, -v[2], v[1]}, + {v[2], 0, -v[0]}, + {-v[1], v[0], 0}}); + } + + static + Matrix + Identity(size_t dims){ + Matrix retVal(dims, dims); + for(size_t x = 0; x < dims; ++x){ + for(size_t y =0; y < dims; ++y){ + if(x == y) retVal.data[x][y] = 1; + else retVal.data[x][y] = 0; + } + } + return retVal; + } + + Vector + operator*(Vector const &v) const { + Vector retVal(data.size()); + int resI = 0; + for(auto row: data){ + for(size_t i = 0; i < row.size(); ++i){ + retVal[resI] += row[i] * v[i]; + } + ++resI; + } + return retVal; + } + + Matrix + operator+(Matrix const&rhs) const{ + Matrix retVal(rhs); + int x = 0; int y = 0; + for(auto r: data){ + for(auto i: r){ + retVal.data[x][y] = i + rhs.data[x][y]; + ++y; + } + y = 0; ++x; + } + return retVal; + } + + void + print() const { + std::cout << *this; + } +}; + +template +std::ostream& operator<<(std::ostream& os, Matrix const &m){ + for(auto r: m.data){ + for(auto i: r){ + os << i << '\t'; + } + os << std::endl; + } + return os; +} + +} // end of namespace flit + +#endif // FLIT_MATRIX_H diff --git a/src/Vector.h b/src/Vector.h new file mode 100644 index 00000000..577ae791 --- /dev/null +++ b/src/Vector.h @@ -0,0 +1,246 @@ +#ifndef FLIT_VECTOR_H +#define FLIT_VECTOR_H + +#include // for std::generate +#include // for std::sqrt +#include // for std::abs +#include // for std::initializer_list +#include // for std::ostream +#include // for std::mt19937 +#include // for std::move +#include // for std::vector + +namespace flit { + +template +class Matrix; + +template +class Vector { + std::vector data; +public: + Vector():data(0){} + Vector(std::initializer_list l) : data(l) {} + Vector(size_t size):data(size, 0) {} + template + Vector(size_t size, U genFun):data(size, 0){ + std::generate(data.begin(), data.end(), genFun); + } + + // Copyable + Vector(const Vector &v):data(v.data) {} + Vector(const std::vector &data) : data(data) {} + Vector& operator=(const Vector &v) { data = v.data; return *this; } + Vector& operator=(const std::vector &data) { this->data = data; return *this; } + + // Movable + Vector(Vector &&v) : data(std::move(v.data)) {} + Vector(std::vector &&data) : data(std::move(data)) {} + Vector& operator=(Vector &&v) { data = std::move(v.data); return *this; } + Vector& operator=(std::vector &&data) { this->data = std::move(data); return *this;} + + size_t size() const { return data.size(); } + T& operator[](size_t indx){return data[indx];} + const T& operator[](size_t indx) const {return data[indx];} + + const std::vector& getData() { return data; } + + Vector + operator*(T const &rhs){ + Vector retVal(size()); + for(uint x = 0; x < size(); ++x){ + retVal[x] = data[x] * rhs; + } + return retVal; + } + + static + Vector + getRandomVector(size_t dim){ + auto copy = getRandSeq(); + copy.erase(copy.begin() + dim, copy.end()); + // We need to make a copy of the copy because the first copy is + // std::vector. We need std::vector. + std::vector otherCopy(copy.begin(), copy.end()); + return Vector(std::move(otherCopy)); + } + + Vector + getUnitVector() const { + Vector retVal(*this); + return retVal * ((T)1.0 / (this->L2Norm())); + } + + bool + operator==(Vector const &b){ + bool retVal = true; + if(b.data.size() != this->data.size()) return false; + for(uint x = 0; x < size(); ++x){ + if(data[x] != b.data[x]){ + retVal = false; + break; + } + } + return retVal; + } + void + dumpDistanceMetrics(Vector const &b, std::ostream& os){ + os << "vector difference: \t" << ((*this) - b) << std::endl; + os << "l1distance: \t" << (*this).L1Distance(b) << std::endl; + } + + //creates a vector with randomly swapped elements, every other + //one negated. For odd sized vectors, the odd one out will + //be zero + //i.e. + // 1 2 3 4 + // may produce + // 2 -1 4 -3 + //or + // 1 2 3 4 5 + // may produce + // 3 4 -1 -2 0 + + Vector + genOrthoVector(){ + Vector retVal(size()); + std::vector seq(size()); + iota(seq.begin(), seq.end(), 0); //load with seq beg w 0 + + shuffle(seq.begin(), seq.end(), std::mt19937(RAND_SEED)); + //do pairwise swap + for(uint i = 0; i < size(); i += 2){ + retVal[seq[i]] = data[seq[i+1]]; + retVal[seq[i+1]] = -data[seq[i]]; + } + if(size() & 1) //odd + retVal[seq[size() - 1]] = 0; + return retVal; + } + + Vector + rotateAboutZ_3d(T rads){ + Matrix t = {{(T)cos(rads), (T)-sin(rads), 0}, + {(T)sin(rads), (T)cos(rads), 0}, + {0, 0, 1}}; + info_stream << "rotation matrix is: " << t << std::endl; + Vector tmp(*this); + tmp = t * tmp; + info_stream << "in rotateAboutZ, result is: " << tmp << std::endl; + return tmp; + } + + Vector + operator-(Vector const &rhs) const { + Vector retVal(size()); + for(uint x = 0; x < size(); ++x){ + retVal[x] = data[x] - rhs.data[x]; + } + return retVal; + } + + T + L1Distance(Vector const &rhs) const { + T distance = 0; + for(uint x = 0; x < size(); ++x){ + distance += std::abs(data[x] - rhs.data[x]); + } + return distance; + } + + //method to reduce vector (pre-sort) + +public: + template + friend std::ostream& operator<<(std::ostream& os, Vector const &a); + + //fun is lamda like: [&sum](T a){sum += a}; + //you provide the T sum, closure will capture + //not const because it may sort cont + //C is container type + + template + void + reduce(C &cont, F const &fun) const { + for_each(cont.begin(), cont.end(), fun); + } + + T + operator^(Vector const &rhs) const { + T sum = 0.0; + for(uint i = 0; i < size(); ++i){ + sum += data[i] * rhs.data[i]; + } + return sum; + } + + T + LInfNorm() const { + T retVal = 0; + for(auto e: data){ + T tmp = std::abs(e); + if( tmp > retVal) retVal = tmp; + } + return retVal; + } + + T LInfDistance(Vector const &rhs) const { + auto diff = operator-(rhs); + return diff.LInfNorm(); + } + + //L2 norm + T + L2Norm() const { + Vector squares(size()); + T retVal = 0; + std::vector prods(data); + reduce(prods, [&retVal](T e){retVal += e*e;}); + return std::sqrt(retVal); + } + + T + L2Distance(Vector const &rhs) const { + T retVal = 0; + auto diff = operator-(rhs); + reduce(diff.data, [&retVal](T e){retVal += e*e;}); + return std::sqrt(retVal); + } + + //cross product, only defined here in 3d + Vector + cross(Vector const &rhs) const { + Vector retVal(size()); + retVal.data[0] = data[1] * rhs.data[2] - rhs.data[1] * data[2]; + retVal.data[1] = rhs.data[0] * data[2] - data[0] * rhs.data[2]; + retVal.data[2] = data[0] * rhs.data[1] - rhs.data[0] * data[1]; + return retVal; + } + + Vector + operator*(Vector const &rhs) const { + Vector retVal(size()); + for(int x = 0; x < size(); ++x){ + retVal[x] = data[x] * rhs.data[x]; + } + return retVal; + } + + bool + isOrtho(Vector const &rhs){ + return operator^(rhs) == (T)0; + } + +}; // end of class Vector + +template +std::ostream& operator<<(std::ostream& os, Vector const &a){ + for(auto i: a.data){ + os << i << '\t'; + } + return os; +} + +} // end of namespace flit + +#endif // FLIT_VECTOR_H diff --git a/src/flit.h b/src/flit.h index 891436e2..7a42ffd9 100644 --- a/src/flit.h +++ b/src/flit.h @@ -5,7 +5,9 @@ #define FLIT_H 0 #include "flitHelpers.h" +#include "Matrix.h" #include "TestBase.h" +#include "Vector.h" #ifdef __CUDA__ //#include diff --git a/src/flitHelpers.h b/src/flitHelpers.h index 26b356e5..db0ae074 100644 --- a/src/flitHelpers.h +++ b/src/flitHelpers.h @@ -123,367 +123,6 @@ as_int(long double val) { // TODO: add an eps() function? Is it already in the standard? -template -class Matrix; - -template -class Vector { - std::vector data; -public: - Vector():data(0){} - Vector(std::initializer_list l) : data(l) {} - Vector(size_t size):data(size, 0) {} - template - Vector(size_t size, U genFun):data(size, 0){ - std::generate(data.begin(), data.end(), genFun); - } - - // Copyable - Vector(const Vector &v):data(v.data) {} - Vector(const std::vector &data) : data(data) {} - Vector& operator=(const Vector &v) { data = v.data; return *this; } - Vector& operator=(const std::vector &data) { this->data = data; return *this; } - - // Movable - Vector(Vector &&v) : data(std::move(v.data)) {} - Vector(std::vector &&data) : data(std::move(data)) {} - Vector& operator=(Vector &&v) { data = std::move(v.data); return *this; } - Vector& operator=(std::vector &&data) { this->data = std::move(data); return *this;} - - size_t size() const { return data.size(); } - T& operator[](size_t indx){return data[indx];} - const T& operator[](size_t indx) const {return data[indx];} - - const std::vector& getData() { return data; } - - Vector - operator*(T const &rhs){ - Vector retVal(size()); - for(uint x = 0; x < size(); ++x){ - retVal[x] = data[x] * rhs; - } - return retVal; - } - - static - Vector - getRandomVector(size_t dim){ - auto copy = getRandSeq(); - copy.erase(copy.begin() + dim, copy.end()); - // We need to make a copy of the copy because the first copy is - // std::vector. We need std::vector. - std::vector otherCopy(copy.begin(), copy.end()); - return Vector(std::move(otherCopy)); - } - - Vector - getUnitVector() const { - Vector retVal(*this); - return retVal * ((T)1.0 / (this->L2Norm())); - } - - bool - operator==(Vector const &b){ - bool retVal = true; - if(b.data.size() != this->data.size()) return false; - for(uint x = 0; x < size(); ++x){ - if(data[x] != b.data[x]){ - retVal = false; - break; - } - } - return retVal; - } - void - dumpDistanceMetrics(Vector const &b, std::ostream& os){ - os << "vector difference: \t" << ((*this) - b) << std::endl; - os << "l1distance: \t" << (*this).L1Distance(b) << std::endl; - } - - //creates a vector with randomly swapped elements, every other - //one negated. For odd sized vectors, the odd one out will - //be zero - //i.e. - // 1 2 3 4 - // may produce - // 2 -1 4 -3 - //or - // 1 2 3 4 5 - // may produce - // 3 4 -1 -2 0 - - Vector - genOrthoVector(){ - Vector retVal(size()); - std::vector seq(size()); - iota(seq.begin(), seq.end(), 0); //load with seq beg w 0 - - shuffle(seq.begin(), seq.end(), std::mt19937(RAND_SEED)); - //do pairwise swap - for(uint i = 0; i < size(); i += 2){ - retVal[seq[i]] = data[seq[i+1]]; - retVal[seq[i+1]] = -data[seq[i]]; - } - if(size() & 1) //odd - retVal[seq[size() - 1]] = 0; - return retVal; - } - - Vector - rotateAboutZ_3d(T rads){ - Matrix t = {{(T)cos(rads), (T)-sin(rads), 0}, - {(T)sin(rads), (T)cos(rads), 0}, - {0, 0, 1}}; - info_stream << "rotation matrix is: " << t << std::endl; - Vector tmp(*this); - tmp = t * tmp; - info_stream << "in rotateAboutZ, result is: " << tmp << std::endl; - return tmp; - } - - Vector - operator-(Vector const &rhs) const { - Vector retVal(size()); - for(uint x = 0; x < size(); ++x){ - retVal[x] = data[x] - rhs.data[x]; - } - return retVal; - } - - T - L1Distance(Vector const &rhs) const { - T distance = 0; - for(uint x = 0; x < size(); ++x){ - distance += std::abs(data[x] - rhs.data[x]); - } - return distance; - } - - //method to reduce vector (pre-sort) - -public: - template - friend std::ostream& operator<<(std::ostream& os, Vector const &a); - - //fun is lamda like: [&sum](T a){sum += a}; - //you provide the T sum, closure will capture - //not const because it may sort cont - //C is container type - - template - void - reduce(C &cont, F const &fun) const { - for_each(cont.begin(), cont.end(), fun); - } - - T - operator^(Vector const &rhs) const { - T sum = 0.0; - for(uint i = 0; i < size(); ++i){ - sum += data[i] * rhs.data[i]; - } - return sum; - } - - T - LInfNorm() const { - T retVal = 0; - for(auto e: data){ - T tmp = std::abs(e); - if( tmp > retVal) retVal = tmp; - } - return retVal; - } - - T LInfDistance(Vector const &rhs) const { - auto diff = operator-(rhs); - return diff.LInfNorm(); - } - - //L2 norm - T - L2Norm() const { - Vector squares(size()); - T retVal = 0; - std::vector prods(data); - reduce(prods, [&retVal](T e){retVal += e*e;}); - return std::sqrt(retVal); - } - - T - L2Distance(Vector const &rhs) const { - T retVal = 0; - auto diff = operator-(rhs); - reduce(diff.data, [&retVal](T e){retVal += e*e;}); - return std::sqrt(retVal); - } - - //cross product, only defined here in 3d - Vector - cross(Vector const &rhs) const { - Vector retVal(size()); - retVal.data[0] = data[1] * rhs.data[2] - rhs.data[1] * data[2]; - retVal.data[1] = rhs.data[0] * data[2] - data[0] * rhs.data[2]; - retVal.data[2] = data[0] * rhs.data[1] - rhs.data[0] * data[1]; - return retVal; - } - - Vector - operator*(Vector const &rhs) const { - Vector retVal(size()); - for(int x = 0; x < size(); ++x){ - retVal[x] = data[x] * rhs.data[x]; - } - return retVal; - } - - bool - isOrtho(Vector const &rhs){ - return operator^(rhs) == (T)0; - } - -}; -template -std::ostream& operator<<(std::ostream& os, Vector const &a){ - for(auto i: a.data){ - os << i << '\t'; - } - return os; -} - -template -std::ostream& operator<<(std::ostream& os, Matrix const &m){ - for(auto r: m.data){ - for(auto i: r){ - os << i << '\t'; - } - os << std::endl; - } - return os; -} - -template -class Matrix { - std::vector> data; -public: - Matrix(unsigned rows, unsigned cols): - data(rows, std::vector(cols, 0)){} - Matrix(Matrix const &m):data(m.data){} - Matrix(std::initializer_list> l): - data(l.size(), std::vector(l.begin()->size())){ - int x = 0; int y = 0; - for(auto r: l){ - for(auto i: r){ - data[x][y] = i; - ++y; - } - ++x; y = 0; - } - } - - friend class Vector; - template - friend std::ostream& operator<<(std::ostream& os, Matrix const &a); - - - bool - operator==(Matrix const &rhs) const { - bool retVal = true; - for(uint x = 0; x < data.size(); ++x){ - for(uint y = 0; y < data[0].size(); ++y){ - if(data[x][y] != rhs.data[x][y]){ - info_stream << "in: " << __func__ << std::endl; - info_stream << "for x,y: " << x << ":" << y << std::endl; - info_stream << "this = " << data[x][y] << "; rhs = " << rhs.data[x][y] << std::endl; - retVal = false; - break; - } - } - } - return retVal; - } - - Matrix - operator*(T const &sca){ - Matrix retVal(data.size(), data[0].size()); - for(uint x = 0; x < data.size(); ++x){ - for(uint y =0; y < data[0].size(); ++y){ - retVal.data[x][y] = data[x][y] * sca; - } - } - return retVal; - } - - //precond: this.w = rhs.h, duh - Matrix - operator*(Matrix const &rhs){ - Matrix retVal(data.size(), rhs.data[0].size()); - for(uint bcol = 0; bcol < rhs.data[0].size(); ++bcol){ - for(uint x = 0; x < data.size(); ++x){ - for(uint y = 0; y < data[0].size(); ++y){ - retVal.data[x][bcol] += data[x][y] * rhs.data[y][bcol]; - } - } - } - return retVal; - } - - //precond: dim(v) == 3 - static - Matrix - SkewSymCrossProdM(Vector const &v){ - return Matrix( - {{0, -v[2], v[1]}, - {v[2], 0, -v[0]}, - {-v[1], v[0], 0}}); - } - - static - Matrix - Identity(size_t dims){ - Matrix retVal(dims, dims); - for(size_t x = 0; x < dims; ++x){ - for(size_t y =0; y < dims; ++y){ - if(x == y) retVal.data[x][y] = 1; - else retVal.data[x][y] = 0; - } - } - return retVal; - } - - Vector - operator*(Vector const &v) const { - Vector retVal(data.size()); - int resI = 0; - for(auto row: data){ - for(size_t i = 0; i < row.size(); ++i){ - retVal[resI] += row[i] * v[i]; - } - ++resI; - } - return retVal; - } - - Matrix - operator+(Matrix const&rhs) const{ - Matrix retVal(rhs); - int x = 0; int y = 0; - for(auto r: data){ - for(auto i: r){ - retVal.data[x][y] = i + rhs.data[x][y]; - ++y; - } - y = 0; ++x; - } - return retVal; - } - - void - print() const { - std::cout << *this; - } -}; - } // end of namespace flit #endif // FLIT_HELPERS_HPP From 6e2562df16f7764a36e4c097e03917a022db108e Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 17:15:51 -0700 Subject: [PATCH 09/20] Fix Empty.cpp test --- data/tests/Empty.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/data/tests/Empty.cpp b/data/tests/Empty.cpp index d0bc4b49..1467129a 100644 --- a/data/tests/Empty.cpp +++ b/data/tests/Empty.cpp @@ -4,14 +4,14 @@ template GLOBAL -void Empty_kernel(const T* const* tiList, double* results) { +void Empty_kernel(const T* const* tiList, size_t n, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto& ti = tiList[idx]; - results[idx] = ti.vals[0]; + auto& ti = tiList[idx*n]; + results[idx] = ti[0]; } /** An example test class to show how to make FLiT tests @@ -107,6 +107,7 @@ class Empty : public flit::TestBase { * implemented above. */ virtual flit::Variant run_impl(const std::vector &ti) override { + FLIT_UNUSED(ti); return flit::Variant(); } From adc012bd2ae99d943e7dd006908d23054d4fc3cb Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 17:32:42 -0700 Subject: [PATCH 10/20] fix misspelling --- scripts/flitcli/flit_make.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/flitcli/flit_make.py b/scripts/flitcli/flit_make.py index 2956409d..bfaf4fa5 100644 --- a/scripts/flitcli/flit_make.py +++ b/scripts/flitcli/flit_make.py @@ -31,7 +31,7 @@ def main(arguments, prog=sys.argv[0]): parser.add_argument('--exec-jobs', type=int, default=1, help=''' The number of parallel jobs to use for the call to - CNU make when performing the test executtion after + GNU make when performing the test executtion after the full compilation has finished. The default is to only run one test at a time in order to allow them to not conflict and to generate accurate From bd9ad5bb598c42f7cc7b0d8ba98e8250067ae7ac Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 12 Jan 2018 17:33:54 -0700 Subject: [PATCH 11/20] Fix four of the litmus tests for changes made --- data/tests/Empty.cpp | 2 +- litmus-tests/tests/DistributivityOfMultiplication.cpp | 9 +++++---- litmus-tests/tests/DoHariGSBasic.cpp | 5 +++-- litmus-tests/tests/DoHariGSImproved.cpp | 4 ++-- litmus-tests/tests/DoMatrixMultSanity.cpp | 8 ++++---- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/data/tests/Empty.cpp b/data/tests/Empty.cpp index 1467129a..b0092190 100644 --- a/data/tests/Empty.cpp +++ b/data/tests/Empty.cpp @@ -10,7 +10,7 @@ void Empty_kernel(const T* const* tiList, size_t n, double* results) { #else auto idx = 0; #endif - auto& ti = tiList[idx*n]; + const T* ti = tiList[idx*n]; results[idx] = ti[0]; } diff --git a/litmus-tests/tests/DistributivityOfMultiplication.cpp b/litmus-tests/tests/DistributivityOfMultiplication.cpp index b578ac4e..69ee54c6 100644 --- a/litmus-tests/tests/DistributivityOfMultiplication.cpp +++ b/litmus-tests/tests/DistributivityOfMultiplication.cpp @@ -13,15 +13,16 @@ template GLOBAL void -DistOfMultKernel(const T* const* tiList, double* results){ +DistOfMultKernel(const T* const* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - T a = tiList[idx][0]; - T b = tiList[idx][1]; - T c = tiList[idx][2]; + const T* ti = tiList[idx*n]; + T a = ti[0]; + T b = ti[1]; + T c = ti[2]; auto distributed = (a * c) + (b * c); results[idx] = distributed; diff --git a/litmus-tests/tests/DoHariGSBasic.cpp b/litmus-tests/tests/DoHariGSBasic.cpp index 9ed54bff..2c0c003a 100644 --- a/litmus-tests/tests/DoHariGSBasic.cpp +++ b/litmus-tests/tests/DoHariGSBasic.cpp @@ -7,14 +7,15 @@ template GLOBAL void -DoHGSBTestKernel(const T* const* tiList, double* result){ +DoHGSBTestKernel(const T* const* tiList, size_t n, double* result){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx]; + const T* vals = tiList[idx*n]; + flit::VectorCU a(vals, 3); flit::VectorCU b(vals + 3, 3); flit::VectorCU c(vals + 6, 3); diff --git a/litmus-tests/tests/DoHariGSImproved.cpp b/litmus-tests/tests/DoHariGSImproved.cpp index 44b324ea..7bd30e31 100644 --- a/litmus-tests/tests/DoHariGSImproved.cpp +++ b/litmus-tests/tests/DoHariGSImproved.cpp @@ -6,13 +6,13 @@ template GLOBAL void -DoHGSITestKernel(const T* const* tiList, double* results){ +DoHGSITestKernel(const T* const* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx]; + const T* vals = tiList[idx*n]; flit::VectorCU a(vals, 3); flit::VectorCU b(vals + 3, 3); flit::VectorCU c(vals + 6, 3); diff --git a/litmus-tests/tests/DoMatrixMultSanity.cpp b/litmus-tests/tests/DoMatrixMultSanity.cpp index 9eeda6e6..98d26e6b 100644 --- a/litmus-tests/tests/DoMatrixMultSanity.cpp +++ b/litmus-tests/tests/DoMatrixMultSanity.cpp @@ -9,15 +9,15 @@ template GLOBAL void -DoMatrixMultSanityKernel(const T* const* tiList, double* results){ +DoMatrixMultSanityKernel(const T* const* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto ti = tiList[idx]; - auto b = flit::VectorCU(ti, ti.length); - auto c = flit::MatrixCU::Identity(ti.length) * b; + const T* ti = tiList[idx*n]; + auto b = flit::VectorCU(ti, n); + auto c = flit::MatrixCU::Identity(n) * b; results[idx] = c.L1Distance(b); } From d99c10b821bbfdf2c766ac22a464a8fa65749a33 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 09:07:14 -0700 Subject: [PATCH 12/20] Add sleep time to touch test --- tests/flit_makefile/tst_incremental_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/flit_makefile/tst_incremental_build.py b/tests/flit_makefile/tst_incremental_build.py index 46524ab2..01f75c49 100644 --- a/tests/flit_makefile/tst_incremental_build.py +++ b/tests/flit_makefile/tst_incremental_build.py @@ -33,7 +33,7 @@ ... th.touch(os.path.join(temp_dir, 'new_header.h')) ... after_modify = compile_dev(temp_dir) ... # touch the header file and make sure it recompiles again -... time.sleep(0.001) # give some time before touching again +... time.sleep(0.01) # give some time before touching again ... th.touch(os.path.join(temp_dir, 'new_header.h')) ... after_touch = compile_dev(temp_dir) Creating ... @@ -75,7 +75,7 @@ ... th.touch(os.path.join(temp_dir, 'new_header.h')) ... after_modify = compile_gt(temp_dir) ... # touch the header file and make sure it recompiles again -... time.sleep(0.001) # give some time before touching again +... time.sleep(0.01) # give some time before touching again ... th.touch(os.path.join(temp_dir, 'new_header.h')) ... after_touch = compile_gt(temp_dir) Creating ... From 46ecf95788f88696873e80084c19aaec8e479f78 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 09:07:55 -0700 Subject: [PATCH 13/20] Split unused features of toml file out --- .../config/flit-default-future.toml.in | 79 +++++++++++++++++++ scripts/flitcli/config/flit-default.toml.in | 41 ++-------- 2 files changed, 86 insertions(+), 34 deletions(-) create mode 100644 scripts/flitcli/config/flit-default-future.toml.in diff --git a/scripts/flitcli/config/flit-default-future.toml.in b/scripts/flitcli/config/flit-default-future.toml.in new file mode 100644 index 00000000..a041c6d4 --- /dev/null +++ b/scripts/flitcli/config/flit-default-future.toml.in @@ -0,0 +1,79 @@ +# Autogenerated by "flit init" +# flit version {flit_version} + +[database] + +# older versions of flit supported postgres. that has been removed. only +# sqlite is supported at the moment. +type = 'sqlite' + +# if relative path, it is relative to the directory containing this +# configuration file. +filepath = 'results.sqlite' + +# For now, only one host is supported, all others are ignored +[[hosts]] + +# TODO: add documentation here for each element. + +name = '{hostname}' +flit_path = '{flit_path}' +config_dir = '{config_dir}' + +# The settings for "make dev" +[hosts.dev_build] +# compiler_name must be found in [[hosts.compilers]] list under name attribute +# but the optimization level and switches do not need to be in the compiler list +compiler_name = 'g++' +optimization_level = '-O2' +switches = '-funsafe-math-optimizations' + +# The ground truth compilation to use in analysis, for "make gt" +[hosts.ground_truth] +# compiler_name must be found in [[hosts.compilers]] list under name attribute +# but the optimization level and switches do not need to be in the compiler list +compiler_name = 'g++' +optimization_level = '-O0' +switches = '' + + # This host's list of compilers. + # For now, only used for hosts.ground_truth and hosts.dev_build. + # TODO: use this list to generate the Makefile + [[hosts.compilers]] + + # TODO: figure out how to specify path for each host machine + # TODO: what if the compilers list is part of the host? + + # binary can be an absolute path, relative path, or binary name (found in + # PATH). If you want to specify a compiler in the same directory as this + # config file, prepend with a "./" (e.g. "./my-compiler") + binary = 'g++' + # TODO: this is not yet used... + # It is recommended to include version number in the name. This is how the + # compiler will be recognized in the results, and subsequently in the + # database and analysis. + name = 'g++' + # TODO: implement these supported types + # There are a few supported types: [ gcc, intel, clang, cuda ] + type = 'gcc' + optimization_levels = [ + '-O0', + '-O1', + '-O2', + '-O3', + #'-Ofast', + #'-O...' ? + ] + # Note: in some versions of python-toml, there is a parsing bug when a list + # has an empty string in the middle. So simply put it at the end without a + # comma. This has been fixed in the latest version of python-toml. + switches_list = [ + '-fassociative-math', + '-mavx', + '-mp1', + '-mavx2 -mfma', + '' + # ... + ] + + diff --git a/scripts/flitcli/config/flit-default.toml.in b/scripts/flitcli/config/flit-default.toml.in index f00194eb..c2b5e23d 100644 --- a/scripts/flitcli/config/flit-default.toml.in +++ b/scripts/flitcli/config/flit-default.toml.in @@ -11,10 +11,9 @@ type = 'sqlite' # configuration file. filepath = 'results.sqlite' +# For now, only one host is supported, all others are ignored [[hosts]] -# TODO: add documentation here for each element. - name = '{hostname}' flit_path = '{flit_path}' config_dir = '{config_dir}' @@ -27,48 +26,22 @@ compiler_name = 'g++' optimization_level = '-O2' switches = '-funsafe-math-optimizations' -# The ground truth compilation to use in analysis +# The ground truth compilation to use in analysis, for "make gt" [hosts.ground_truth] -# compiler needs to match the name field for one of this host's hosts.compilers -# The optimization level needs to be part of the list of optimization_levels -# and the switch needs to be a string of compiler flags from the switches list. -# TODO: make use of hosts.ground_truth +# compiler_name must be found in [[hosts.compilers]] list under name attribute +# but the optimization level and switches do not need to be in the compiler list compiler_name = 'g++' optimization_level = '-O0' switches = '' + # This host's list of compilers. + # For now, only used for hosts.ground_truth and hosts.dev_build. + # TODO: use this list to generate the Makefile [[hosts.compilers]] - # TODO: figure out how to specify path for each host machine - # TODO: what if the compilers list is part of the host? - # binary can be an absolute path, relative path, or binary name (found in # PATH). If you want to specify a compiler in the same directory as this # config file, prepend with a "./" (e.g. "./my-compiler") binary = 'g++' - # TODO: this is not yet used... - # It is recommended to include version number in the name. This is how the - # compiler will be recognized in the results, and subsequently in the - # database and analysis. name = 'g++' - # TODO: implement these supported types - # There are a few supported types: [ gcc, intel, clang, cuda ] - type = 'gcc' - optimization_levels = [ - '-O0', - '-O1', - '-O2', - '-O3', - #'-Ofast', - #'-O...' ? - ] - switches_list = [ - '', - '-fassociative-math', - '-mavx', - '-mp1', - '-mavx2 -mfma', - # ... - ] - From 2343ff3f3a75be50110719fcc4a6423d7ab5239e Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 10:12:11 -0700 Subject: [PATCH 14/20] tinys.cpp: fix this litmus test to compile --- litmus-tests/tests/tinys.cpp | 64 ++++++++---------------------------- 1 file changed, 14 insertions(+), 50 deletions(-) diff --git a/litmus-tests/tests/tinys.cpp b/litmus-tests/tests/tinys.cpp index ab0c2d8d..993af63e 100644 --- a/litmus-tests/tests/tinys.cpp +++ b/litmus-tests/tests/tinys.cpp @@ -6,46 +6,6 @@ #include #include -namespace { - - template - T get_tiny1() { static_assert(false, "Unimplemented type"); } - - template - T get_tiny2() { static_assert(false, "Unimplemented type"); } - - template <> - float get_tiny1(){ - return 1.175494351-38; - } - - template <> - double get_tiny1(){ - return 2.2250738585072014e-308; - } - - template <> - long double get_tiny1(){ - return 3.362103143112093506262e-4931L; - } - - template <> - float get_tiny2(){ - return 1.175494352-38; - } - - template <> - double get_tiny2(){ - return 2.2250738585072015e-308; - } - - template <> - long double get_tiny2(){ - return 3.362103143112093506263e-4931L; - } - -} // end of unnamed namespace - template class FtoDecToF: public flit::TestBase { public: @@ -53,7 +13,7 @@ class FtoDecToF: public flit::TestBase { virtual size_t getInputsPerRun() override { return 1; } virtual std::vector getDefaultInput() override { - return {std::nextafter(T(0.0), T(1.0))}; + return { std::numeric_limits::min() }; } protected: @@ -64,12 +24,12 @@ class FtoDecToF: public flit::TestBase { // from https://en.wikipedia.org/wiki/IEEE_floating_point uint16_t ddigs = nlim.digits * std::log10(2) + 1; std::ostringstream res; - res << std::setprecision(ddigs) << ti.vals[0]; + res << std::setprecision(ddigs) << ti[0]; std::string dstr; dstr = res.str(); T backAgain; std::istringstream(dstr) >> backAgain; - return ti.vals[0] - backAgain; + return ti[0] - backAgain; } using flit::TestBase::id; @@ -83,13 +43,13 @@ class subnormal: public flit::TestBase { virtual size_t getInputsPerRun() override { return 1; } virtual std::vector getDefaultInput() override { - return {std::nextafter(T(0.0), T(1.0))}; + return { std::numeric_limits::min() }; } protected: virtual flit::KernelFunction* getKernel() override { return nullptr; } virtual flit::Variant run_impl(const std::vector& ti) override { - return ti.vals[0] - ti.vals[0] / 2; + return ti[0] - ti[0] / 2; } using flit::TestBase::id; }; @@ -302,7 +262,7 @@ class zeroDivX: public flit::TestBase { virtual flit::KernelFunction* getKernel() override { return nullptr; } virtual flit::Variant run_impl(const std::vector& ti) override { - auto res = (T)0.0 / ti.vals[0]; + auto res = (T)0.0 / ti[0]; return res; } using flit::TestBase::id; @@ -518,10 +478,12 @@ class xPc1EqC2: public flit::TestBase { virtual size_t getInputsPerRun() override { return 3; } virtual std::vector getDefaultInput() override { + const T eps = std::numeric_limits::min(); + const T next = std::nextafter(eps, std::numeric_limits::infinity()); return { flit::getRandSeq()[0], - get_tiny1(), - get_tiny2(), + eps, + next, }; } protected: @@ -542,10 +504,12 @@ class xPc1NeqC2: public flit::TestBase { virtual size_t getInputsPerRun() override { return 3; } virtual std::vector getDefaultInput() override { + const T eps = std::numeric_limits::min(); + const T next = std::nextafter(eps, std::numeric_limits::infinity()); return { flit::getRandSeq()[0], - get_tiny1(), - get_tiny2(), + eps, + next, }; } protected: From 2e49dc5ed8d23dad14d1cf2e1dfc77019ec9bc3c Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 10:46:09 -0700 Subject: [PATCH 15/20] Makefile.in: suppress comment for make clean --- data/Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/Makefile.in b/data/Makefile.in index bcfd3736..62b79681 100644 --- a/data/Makefile.in +++ b/data/Makefile.in @@ -436,8 +436,8 @@ runbuild: $(TARGETS) $(CUTARGETS) groundtruth .PHONY: clean clean: - # Here we do it this way because we were running into the error of too many - # arguments given to rm. + @# Here we do it this way because we were running into the error of too many + @# arguments given to rm. $(foreach obj,$(OBJ_CLEAN),rm -f $(obj);) $(foreach obj,$(DEP_CLEAN),rm -f $(obj);) -rmdir $(OBJ_DIR) From d50febcc2ef317272ef6ea747e4423d7b3291c78 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 10:46:44 -0700 Subject: [PATCH 16/20] Fix CUDA kernel and get all litmus tests to compile --- data/tests/Empty.cpp | 4 +- .../tests/DistributivityOfMultiplication.cpp | 4 +- litmus-tests/tests/DoHariGSBasic.cpp | 4 +- litmus-tests/tests/DoHariGSImproved.cpp | 4 +- litmus-tests/tests/DoMatrixMultSanity.cpp | 4 +- litmus-tests/tests/DoOrthoPerturbTest.cpp | 19 ++++---- litmus-tests/tests/DoSimpleRotate90.cpp | 6 +-- .../tests/DoSkewSymCPRotationTest.cpp | 5 ++- litmus-tests/tests/FMACancel.cpp | 4 +- litmus-tests/tests/RotateAndUnrotate.cpp | 6 +-- litmus-tests/tests/RotateFullCircle.cpp | 31 +++++++------ litmus-tests/tests/TrianglePHeron.cpp | 4 +- litmus-tests/tests/TrianglePSylv.cpp | 45 +++++++++++-------- src/TestBase.h | 6 +-- 14 files changed, 80 insertions(+), 66 deletions(-) diff --git a/data/tests/Empty.cpp b/data/tests/Empty.cpp index b0092190..6cb24f42 100644 --- a/data/tests/Empty.cpp +++ b/data/tests/Empty.cpp @@ -4,13 +4,13 @@ template GLOBAL -void Empty_kernel(const T* const* tiList, size_t n, double* results) { +void Empty_kernel(const T* tiList, size_t n, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* ti = tiList[idx*n]; + const T* ti = tiList + (idx*n); results[idx] = ti[0]; } diff --git a/litmus-tests/tests/DistributivityOfMultiplication.cpp b/litmus-tests/tests/DistributivityOfMultiplication.cpp index 69ee54c6..fba84662 100644 --- a/litmus-tests/tests/DistributivityOfMultiplication.cpp +++ b/litmus-tests/tests/DistributivityOfMultiplication.cpp @@ -13,13 +13,13 @@ template GLOBAL void -DistOfMultKernel(const T* const* tiList, size_t n, double* results){ +DistOfMultKernel(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* ti = tiList[idx*n]; + const T* ti = tiList + (idx*n); T a = ti[0]; T b = ti[1]; T c = ti[2]; diff --git a/litmus-tests/tests/DoHariGSBasic.cpp b/litmus-tests/tests/DoHariGSBasic.cpp index 2c0c003a..642bae45 100644 --- a/litmus-tests/tests/DoHariGSBasic.cpp +++ b/litmus-tests/tests/DoHariGSBasic.cpp @@ -7,14 +7,14 @@ template GLOBAL void -DoHGSBTestKernel(const T* const* tiList, size_t n, double* result){ +DoHGSBTestKernel(const T* tiList, size_t n, double* result){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx*n]; + const T* vals = tiList + (idx*n); flit::VectorCU a(vals, 3); flit::VectorCU b(vals + 3, 3); diff --git a/litmus-tests/tests/DoHariGSImproved.cpp b/litmus-tests/tests/DoHariGSImproved.cpp index 7bd30e31..ea3e24b1 100644 --- a/litmus-tests/tests/DoHariGSImproved.cpp +++ b/litmus-tests/tests/DoHariGSImproved.cpp @@ -6,13 +6,13 @@ template GLOBAL void -DoHGSITestKernel(const T* const* tiList, size_t n, double* results){ +DoHGSITestKernel(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx*n]; + const T* vals = tiList + (idx*n); flit::VectorCU a(vals, 3); flit::VectorCU b(vals + 3, 3); flit::VectorCU c(vals + 6, 3); diff --git a/litmus-tests/tests/DoMatrixMultSanity.cpp b/litmus-tests/tests/DoMatrixMultSanity.cpp index 98d26e6b..a16dba80 100644 --- a/litmus-tests/tests/DoMatrixMultSanity.cpp +++ b/litmus-tests/tests/DoMatrixMultSanity.cpp @@ -9,13 +9,13 @@ template GLOBAL void -DoMatrixMultSanityKernel(const T* const* tiList, size_t n, double* results){ +DoMatrixMultSanityKernel(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* ti = tiList[idx*n]; + const T* ti = tiList + (idx*n); auto b = flit::VectorCU(ti, n); auto c = flit::MatrixCU::Identity(n) * b; results[idx] = c.L1Distance(b); diff --git a/litmus-tests/tests/DoOrthoPerturbTest.cpp b/litmus-tests/tests/DoOrthoPerturbTest.cpp index 4fe1d8af..32b83aec 100644 --- a/litmus-tests/tests/DoOrthoPerturbTest.cpp +++ b/litmus-tests/tests/DoOrthoPerturbTest.cpp @@ -13,28 +13,28 @@ namespace { template GLOBAL void -DoOPTKernel(const T* const* tiList, double* results){ +DoOPTKernel(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto ti = tiList[idx]; + const T* ti = tiList + (idx*n); double score = 0.0; - cuvector orthoCount(dim, 0.0); + cuvector orthoCount(n, 0.0); // we use a double literal above as a workaround for Intel 15-16 compiler // bug: // https://software.intel.com/en-us/forums/intel-c-compiler/topic/565143 - flit::VectorCU a(ti, ti.size()); + flit::VectorCU a(ti, n); flit::VectorCU b = a.genOrthoVector(); T backup; - for(decltype(dim) r = 0; r < dim; ++r){ + for(decltype(n) r = 0; r < n; ++r){ T &p = a[r]; backup = p; - for(decltype(iters) i = 0; i < iters; ++i){ + for(int i = 0; i < iters; ++i){ auto tmp = flit::as_int(p); p = flit::as_float(++tmp); //yeah, this isn't perfect //p = std::nextafter(p, std::numeric_limits::max()); @@ -64,8 +64,9 @@ class DoOrthoPerturbTest : public flit::TestBase { virtual size_t getInputsPerRun() override { return 16; } virtual std::vector getDefaultInput() override { auto dim = getInputsPerRun(); - ti = std::vector(dim); - for(decltype(dim) x = 0; x < dim; ++x) ti[x] = static_cast(1 << x); + std::vector ti(dim); + for(decltype(dim) x = 0; x < dim; ++x) + ti[x] = static_cast(1 << x); return ti; } @@ -89,7 +90,7 @@ class DoOrthoPerturbTest : public flit::TestBase { for(decltype(dim) r = 0; r < dim; ++r){ T &p = a[r]; backup = p; - for(decltype(iters) i = 0; i < iters; ++i){ + for(int i = 0; i < iters; ++i){ //cout << "r:" << r << ":i:" << i << std::std::endl; p = std::nextafter(p, std::numeric_limits::max()); diff --git a/litmus-tests/tests/DoSimpleRotate90.cpp b/litmus-tests/tests/DoSimpleRotate90.cpp index 86b96f6a..01951400 100644 --- a/litmus-tests/tests/DoSimpleRotate90.cpp +++ b/litmus-tests/tests/DoSimpleRotate90.cpp @@ -7,14 +7,14 @@ template GLOBAL void -DoSR90Kernel(const flit::CuTestInput* tiList, double* results){ +DoSR90Kernel(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto ti = tiList[idx]; - flit::VectorCU A(ti.vals, ti.length); + const T* ti = tiList + (idx*n); + flit::VectorCU A(ti, n); flit::VectorCU expected(A.size()); expected[0]=-A[1]; expected[1]=A[0]; expected[2]=A[2]; diff --git a/litmus-tests/tests/DoSkewSymCPRotationTest.cpp b/litmus-tests/tests/DoSkewSymCPRotationTest.cpp index 60f35f21..caef8f14 100644 --- a/litmus-tests/tests/DoSkewSymCPRotationTest.cpp +++ b/litmus-tests/tests/DoSkewSymCPRotationTest.cpp @@ -7,13 +7,13 @@ template GLOBAL void -DoSkewSCPRKernel(const flit::CuTestInput* tiList, double* results){ +DoSkewSCPRKernel(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - const T* vals = tiList[idx].vals; + const T* vals = tiList + (idx*n); auto A = flit::VectorCU(vals, 3).getUnitVector(); auto B = flit::VectorCU(vals + 3, 3).getUnitVector(); auto cross = A.cross(B); @@ -34,6 +34,7 @@ class DoSkewSymCPRotationTest: public flit::TestBase { virtual size_t getInputsPerRun() override { return 6; } virtual std::vector getDefaultInput() override { + auto n = getInputsPerRun(); return flit::Vector::getRandomVector(n).getData(); } diff --git a/litmus-tests/tests/FMACancel.cpp b/litmus-tests/tests/FMACancel.cpp index a43fd1ed..b175bc7f 100644 --- a/litmus-tests/tests/FMACancel.cpp +++ b/litmus-tests/tests/FMACancel.cpp @@ -17,8 +17,8 @@ class FMACancel : public flit::TestBase { protected: virtual flit::Variant run_impl(const std::vector& ti) override { - const T a = ti.vals[0]; - const T b = ti.vals[1]; + const T a = ti[0]; + const T b = ti[1]; const T c = a; const T d = -b; diff --git a/litmus-tests/tests/RotateAndUnrotate.cpp b/litmus-tests/tests/RotateAndUnrotate.cpp index 516d951f..9197fac8 100644 --- a/litmus-tests/tests/RotateAndUnrotate.cpp +++ b/litmus-tests/tests/RotateAndUnrotate.cpp @@ -8,15 +8,15 @@ template GLOBAL void -RaUKern(const flit::CuTestInput* tiList, double* results){ +RaUKern(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif auto theta = M_PI; - auto ti = tiList[idx]; - auto A = flit::VectorCU(ti.vals, ti.length); + const T* ti = tiList + (idx*n); + auto A = flit::VectorCU(ti, n); auto orig = A; A = A.rotateAboutZ_3d(theta); A = A.rotateAboutZ_3d(-theta); diff --git a/litmus-tests/tests/RotateFullCircle.cpp b/litmus-tests/tests/RotateFullCircle.cpp index 2bbf05e8..0c055bad 100644 --- a/litmus-tests/tests/RotateFullCircle.cpp +++ b/litmus-tests/tests/RotateFullCircle.cpp @@ -4,21 +4,24 @@ #include +namespace { + const int iters = 200; +} // end of unnamed namespace + template GLOBAL void -RFCKern(const flit::CuTestInput* tiList, double* results){ +RFCKern(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto ti = tiList[idx]; - auto n = ti.iters; - auto A = flit::VectorCU(ti.vals, ti.length); + const T* ti = tiList + (idx*n); + auto A = flit::VectorCU(ti, n); auto orig = A; - T theta = 2 * M_PI / n; - for(decltype(n) r = 0; r < n; ++r){ + T theta = 2 * M_PI / iters; + for(int r = 0; r < iters; ++r){ A = A.rotateAboutZ_3d(theta); } results[idx] = A.L1Distance(orig); @@ -30,7 +33,7 @@ class RotateFullCircle: public flit::TestBase { RotateFullCircle(std::string id) : flit::TestBase(std::move(id)){} virtual size_t getInputsPerRun() override { return 3; } - virtual std::vector getDefaultInput() override { + virtual std::vector getDefaultInput() override { auto n = getInputsPerRun(); return flit::Vector::getRandomVector(n).getData(); } @@ -39,20 +42,22 @@ class RotateFullCircle: public flit::TestBase { virtual flit::KernelFunction* getKernel() override {return RFCKern; } virtual flit::Variant run_impl(const std::vector& ti) override { - auto n = 200; flit::Vector A = flit::Vector(ti); auto orig = A; - T theta = 2 * M_PI / n; - flit::info_stream << "Rotate full circle in " << n << " increments, A is: " << A << std::endl; - for(decltype(n) r = 0; r < n; ++r){ + T theta = 2 * M_PI / iters; + flit::info_stream << "Rotate full circle in " << iters + << " increments, A is: " << A << std::endl; + for(int r = 0; r < iters; ++r){ A.rotateAboutZ_3d(theta); flit::info_stream << r << " rotations, vect = " << A << std::endl; } flit::info_stream << "Rotated is: " << A << std::endl; bool equal = A == orig; - flit::info_stream << "Does rotated vect == starting vect? " << equal << std::endl; + flit::info_stream << "Does rotated vect == starting vect? " << equal + << std::endl; if(!equal){ - flit::info_stream << "The (vector) difference is: " << (A - orig) << std::endl; + flit::info_stream << "The (vector) difference is: " << (A - orig) + << std::endl; } flit::info_stream << "in " << id << std::endl; A.dumpDistanceMetrics(orig, flit::info_stream); diff --git a/litmus-tests/tests/TrianglePHeron.cpp b/litmus-tests/tests/TrianglePHeron.cpp index 44beb18e..aa35ea00 100644 --- a/litmus-tests/tests/TrianglePHeron.cpp +++ b/litmus-tests/tests/TrianglePHeron.cpp @@ -28,13 +28,13 @@ T getArea(const T a, template GLOBAL void -TrianglePHKern(const T* const* tiList, size_t n, double* results) { +TrianglePHKern(const T* tiList, size_t n, double* results) { #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - T* start = tiList + idx * n; + auto start = tiList + (idx*n); T maxval = start[0]; T a = maxval; T b = maxval; diff --git a/litmus-tests/tests/TrianglePSylv.cpp b/litmus-tests/tests/TrianglePSylv.cpp index d614d202..33ff2c92 100644 --- a/litmus-tests/tests/TrianglePSylv.cpp +++ b/litmus-tests/tests/TrianglePSylv.cpp @@ -4,45 +4,53 @@ #include +namespace { + const int iters = 200; +} // end of unnamed namespace + template DEVICE -T getCArea(const T a, - const T b, - const T c){ - return (flit::cpow((T)2.0, (T)-2)*flit::csqrt((T)(a+(b+c))*(a+(b-c))*(c+(a-b))*(c-(a-b)))); +T getCArea(const T a, const T b, const T c) { + return flit::cpow(T(2.0), T(-2)) * + flit::csqrt(T((a + (b + c)) * + (a + (b - c)) * + (c + (a - b)) * + (c - (a - b)))); } template -T getArea(const T a, - const T b, - const T c){ - return (pow((T)2.0, -2)*sqrt((T)(a+(b+c))*(a+(b-c))*(c+(a-b))*(c-(a-b)))); +T getArea(const T a, const T b, const T c) { + return pow(T(2.0), T(-2)) * + sqrt(T((a + (b + c)) * + (a + (b - c)) * + (c + (a - b)) * + (c - (a - b)))); } template GLOBAL void -TrianglePSKern(const flit::CuTestInput* tiList, double* results){ +TrianglePSKern(const T* tiList, size_t n, double* results){ #ifdef __CUDA__ auto idx = blockIdx.x * blockDim.x + threadIdx.x; #else auto idx = 0; #endif - auto ti = tiList[idx]; - T maxval = tiList[idx].vals[0]; + const T* ti = tiList + (idx*n); + T maxval = ti[0]; T a = maxval; T b = maxval; - T c = maxval * flit::csqrt((T)2.0); - const T delta = maxval / (T)ti.iters; - const T checkVal = (T)0.5 * b * a; + T c = maxval * flit::csqrt(T(2.0)); + const T delta = maxval / T(iters); + const T checkVal = T(0.5) * b * a; double score = 0.0; for(T pos = 0; pos <= a; pos += delta){ - b = flit::csqrt(flit::cpow(pos, (T)2.0) + - flit::cpow(maxval, (T)2.0)); - c = flit::csqrt(flit::cpow(a - pos, (T)2.0) + - flit::cpow(maxval, (T)2.0)); + b = flit::csqrt(flit::cpow(pos, T(2.0)) + + flit::cpow(maxval, T(2.0))); + c = flit::csqrt(flit::cpow(a - pos, T(2.0)) + + flit::cpow(maxval, T(2.0))); auto crit = getCArea(a,b,c); score += std::abs(crit - checkVal); } @@ -64,7 +72,6 @@ class TrianglePSylv: public flit::TestBase { virtual flit::Variant run_impl(const std::vector& ti) override { T maxval = ti[0]; - auto iters = 200; // start as a right triangle T a = maxval; T b = maxval; diff --git a/src/TestBase.h b/src/TestBase.h index a5943cb3..e7511a77 100644 --- a/src/TestBase.h +++ b/src/TestBase.h @@ -73,12 +73,12 @@ std::ostream& operator<<(std::ostream& os, const TestResult& res); /** Definition of a kernel function used by CUDA tests * - * @param arr: array of input arrays, already allocated and populated - * @param n: length of the second dimension of arr + * @param arr: array of input arrays, flattened, already allocated and populated + * @param n: length of each input, it is the stride * @param results: array where to store results, already allocated */ template -using KernelFunction = void (const T* const*, size_t, double*); +using KernelFunction = void (const T*, size_t, double*); template using CudaDeleter = void (T*); From defee194890074cdf113788dc25851f96960f5b5 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 10:59:20 -0700 Subject: [PATCH 17/20] python tests now return error codes on failure --- tests/flit_cli/tst_version.py | 5 +++-- tests/flit_makefile/tst_empty_project.py | 5 +++-- tests/flit_makefile/tst_incremental_build.py | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/flit_cli/tst_version.py b/tests/flit_cli/tst_version.py index faf3d82c..a0da1076 100644 --- a/tests/flit_cli/tst_version.py +++ b/tests/flit_cli/tst_version.py @@ -51,5 +51,6 @@ sys.path = before_path if __name__ == '__main__': - import doctest - doctest.testmod() + from doctest import testmod + failures, tests = testmod() + sys.exit(failures) diff --git a/tests/flit_makefile/tst_empty_project.py b/tests/flit_makefile/tst_empty_project.py index 05267d89..d37f319f 100644 --- a/tests/flit_makefile/tst_empty_project.py +++ b/tests/flit_makefile/tst_empty_project.py @@ -80,5 +80,6 @@ sys.path = before_path if __name__ == '__main__': - import doctest - doctest.testmod() + from doctest import testmod + failures, tests = testmod() + sys.exit(failures) diff --git a/tests/flit_makefile/tst_incremental_build.py b/tests/flit_makefile/tst_incremental_build.py index 01f75c49..7d0de135 100644 --- a/tests/flit_makefile/tst_incremental_build.py +++ b/tests/flit_makefile/tst_incremental_build.py @@ -103,5 +103,6 @@ sys.path = before_path if __name__ == '__main__': - import doctest - doctest.testmod() + from doctest import testmod + failures, tests = testmod() + sys.exit(failures) From 66c5098c7c004688e4953c78f4e5f141fb381d3b Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 11:09:13 -0700 Subject: [PATCH 18/20] Remove finished TODO statements --- src/flitHelpers.h | 2 -- tests/flit_src/tst_flitHelpers_h.cpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/flitHelpers.h b/src/flitHelpers.h index db0ae074..6d0e1313 100644 --- a/src/flitHelpers.h +++ b/src/flitHelpers.h @@ -121,8 +121,6 @@ as_int(long double val) { return temp & (~zero >> 48); } -// TODO: add an eps() function? Is it already in the standard? - } // end of namespace flit #endif // FLIT_HELPERS_HPP diff --git a/tests/flit_src/tst_flitHelpers_h.cpp b/tests/flit_src/tst_flitHelpers_h.cpp index 8b10bf27..225408d7 100644 --- a/tests/flit_src/tst_flitHelpers_h.cpp +++ b/tests/flit_src/tst_flitHelpers_h.cpp @@ -124,8 +124,6 @@ void tst_as_float_80bit() { } TH_REGISTER(tst_as_float_80bit); -// TODO: add tst_as_int_128bit() - void tst_as_int_32bit() { uint32_t expected = 1067316150; float val= 1.234; From 1a0a6ec037cf2dca28dd0e8117af9849a254e521 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 11:19:59 -0700 Subject: [PATCH 19/20] Split out MatrixCU and VectorCU --- src/CUHelpers.h | 351 +-------------------------------- src/MatrixCU.h | 152 ++++++++++++++ src/Vector.h | 2 + src/VectorCU.h | 219 ++++++++++++++++++++ src/{CUVector.h => cuvector.h} | 48 +---- src/flit.h | 4 +- 6 files changed, 380 insertions(+), 396 deletions(-) create mode 100644 src/MatrixCU.h create mode 100644 src/VectorCU.h rename src/{CUVector.h => cuvector.h} (78%) diff --git a/src/CUHelpers.h b/src/CUHelpers.h index febd37c5..dda24434 100644 --- a/src/CUHelpers.h +++ b/src/CUHelpers.h @@ -15,7 +15,7 @@ #define GLOBAL __global__ #endif #include "flitHelpers.h" -#include "CUVector.h" +#include "cuvector.h" #include @@ -142,355 +142,6 @@ initDeviceData() { #endif // defined(__CUDA__) && !defined(__CPUKERNEL__) } -template -class MatrixCU; - -template -class VectorCU { - cuvector data; - friend class MatrixCU; -public: - using vsize_t = typename cuvector::cvs_t; - - HOST_DEVICE - explicit - VectorCU(vsize_t dim) : data(dim) {} - HOST VectorCU(std::initializer_list l) : data(l) {} - HOST_DEVICE VectorCU(const T* array, vsize_t size) : data(array, size) {} - - // copy support - HOST_DEVICE VectorCU(const VectorCU& rhs):data(rhs.data){} - HOST_DEVICE VectorCU(const cuvector& vals):data(vals){} - HOST_DEVICE VectorCU& operator=(const VectorCU& rhs) { data = rhs.data; return *this; } - HOST_DEVICE VectorCU& operator=(const cuvector& vals) { data = vals; return *this; } - - // move support - HOST_DEVICE VectorCU(VectorCU&& rhs):data(std::move(rhs.data)){} - HOST_DEVICE VectorCU(cuvector&& vals):data(std::move(vals)){} - HOST_DEVICE VectorCU& operator=(VectorCU&& rhs) { data = std::move(rhs.data); return *this; } - HOST_DEVICE VectorCU& operator=(cuvector&& vals) { data = std::move(vals); return *this; } - - HOST_DEVICE - T& - operator[](vsize_t index){ - return data[index]; - } - - HOST_DEVICE - T - operator[](vsize_t index) const { - return data[index]; - } - - HOST_DEVICE - inline vsize_t - size() const noexcept { - return data.size(); - } - - DEVICE - static - VectorCU - getRandomVector(vsize_t dim){ - VectorCU retVal(dim); - printf("made retval\n"); - auto rands = getRandSeqCU(); - for(vsize_t x = 0; x < dim; ++x){ - retVal.data[x] = rands[x]; - } - return retVal; - } - - //predoncition: this only works with vectors of - //predetermined size, now 16 - DEVICE - VectorCU - genOrthoVector() const { - VectorCU retVal(data.size()); - auto shuff = get16ShuffledCU(); - for(vsize_t x = 0; x < data.size(); x += 2){ - retVal[shuff[x]] = data[shuff[x+1]]; - retVal[shuff[x+1]] = -data[shuff[x]]; - } - return retVal; - } - - HOST_DEVICE - VectorCU - rotateAboutZ_3d(T rads){ - MatrixCU t(3,3); - t[0][0]=ccos(rads); t[0][1]=-csin(rads); t[0][2]=0; - t[1][0]=csin(rads); t[1][1]=ccos(rads); t[1][2]=0; - t[2][0]=0; t[2][1]=0; t[2][2]=1; - return t * (*this); - } - - HOST_DEVICE - VectorCU - getUnitVector() const { - VectorCU retVal(*this); - return retVal * ((T)1.0 / (L2Norm())); - } - - HOST_DEVICE - bool - operator==(VectorCU const &b){ - if(this->data.size() != b.data.size()) return false; - for(vsize_t x = 0; x < b.data.size(); ++x){ - if(data[x] != b.data[x]) return false; - } - return true; - } - - HOST_DEVICE - T - L1Distance(VectorCU const &rhs) const { - T distance = 0; - for(vsize_t x = 0; x < data.size(); ++x){ - distance += std::abs(data[x] - rhs.data[x]); - } - return distance; - } - - HOST_DEVICE - T - operator^(VectorCU const &rhs) const { - T sum = 0.0; - for(vsize_t i = 0; i < data.size(); ++i){ - sum += data[i] * rhs.data[i]; - } - return sum; - } - - HOST_DEVICE - VectorCU - operator*(VectorCU const &rhs) const{ - VectorCU ret(data.size()); - for(vsize_t x = 0; x < data.size(); ++x){ - ret[x] = data[x] * rhs.data[x]; - } - return ret; - } - - HOST_DEVICE - VectorCU - operator*(T const& sca) const { - VectorCU ret(data.size()); - for(vsize_t x = 0; x < data.size(); ++x){ - ret[x] = data[x] * sca; - } - return ret; - } - - HOST_DEVICE - VectorCU - operator-(const VectorCU& rhs) const { - VectorCU retVal(data.size()); - for(vsize_t x = 0; - x < data.size(); - ++x){ - retVal.data[x] = data[x] - rhs.data[x]; - } - return retVal; - } - - HOST_DEVICE - T - LInfNorm() const { - T largest = 0; - for(vsize_t x = 0; - x < data.size(); - ++x){ - T tmp = abs(data[x]); - if(tmp > largest) largest = tmp; - } - return largest; - } - - HOST_DEVICE - T - LInfDistance(VectorCU const &rhs) const { - auto diff = operator-(rhs); - return diff.LInfNorm(); - } - - //TODO this assumes there is only float and double on - //CUDA (may change for half precision) - HOST_DEVICE - T - L2Norm() const { - VectorCU squares = (*this) * (*this); - T retVal = (T)0.0; - for(vsize_t x = 0; - x < data.size(); - ++x) retVal += squares.data[x]; - if(sizeof(T) == 4) return sqrtf(retVal); - else return sqrt(retVal); - } - - T - HOST_DEVICE - L2Distance(VectorCU const &rhs) const { - return ((*this) - rhs).L2Norm(); - } - - HOST_DEVICE - VectorCU - cross(VectorCU const &rhs) const { - VectorCU retVal(data.size()); - retVal.data[0] = data[1] * rhs.data[2] - rhs.data[1] * data[2]; - retVal.data[1] = rhs.data[0] * data[2] - data[0] * rhs.data[2]; - retVal.data[2] = data[0] * rhs.data[1] - rhs.data[0] * data[1]; - return retVal; - } - - HOST_DEVICE - bool - isOrtho(VectorCU const &rhs){ - return operator^(rhs) == (T)0; - } -}; - -template -class MatrixCU { - using rdtype = cuvector; - cuvector data; -public: - using vsize_t = typename cuvector::cvs_t; - - HOST_DEVICE - MatrixCU(vsize_t rows, vsize_t cols): - data(rows, cuvector(cols,0)){} - - HOST_DEVICE - inline - rdtype& - operator[](vsize_t indx){ - return data[indx]; - } - - HOST_DEVICE - inline - rdtype - operator[](vsize_t indx) const { - return data[indx]; - } - - HOST_DEVICE - bool - operator==(MatrixCU const &rhs) const { - if(data.size() != rhs.data.size()) return false; - bool retVal = true; - for(vsize_t x = 0; x < data.size(); ++x){ - for(vsize_t y = 0; y < data[0].size(); ++y){ - if(data[x][y] != rhs.data[x][y]){ - retVal = false; - break; - } - } - } - return retVal; - } - - HOST_DEVICE - MatrixCU - operator*(T const &sca){ - MatrixCU retVal(data.size(), data[0].size()); - for(vsize_t x = 0; x < data.size(); ++x){ - for(vsize_t y = 0; y < data[0].size(); ++y){ - retVal.data[x][y] = data[x][y] * sca; - } - } - return retVal; - } - - HOST_DEVICE - MatrixCU - operator*(MatrixCU const &rhs){ - MatrixCU retVal(data.size(), rhs.data[0].size()); - for(vsize_t bcol = 0; bcol < rhs.data[0].size(); ++bcol){ - for(vsize_t x = 0; x < data.size(); ++x){ - for(vsize_t y = 0; y < data[0].size(); ++y){ - retVal.data[x][bcol] += data[x][y] * rhs.data[y][bcol]; - } - } - } - return retVal; - } - - HOST_DEVICE - static - MatrixCU - SkewSymCrossProdM(VectorCU const &v){ - MatrixCU retVal(3,3); - retVal[0][0] = 0; - retVal[0][1] = -v[2]; - retVal[0][2] = v[1]; - - retVal[0][0] = v[2]; - retVal[1][1] = 0; - retVal[2][2] = -v[0]; - - retVal[0][0] = -v[1]; - retVal[1][1] = v[0]; - retVal[2][2] = 0; - - return retVal; - } - - HOST_DEVICE - static - MatrixCU - Identity(size_t dims){ - MatrixCU retVal(dims, dims); - for(size_t x = 0; x < dims; ++x){ - for(size_t y = 0; y < dims; ++y){ - if(x == y) retVal[x][y] = 1; - else retVal[x][y] = 0; - } - } - return retVal; - } - - HOST_DEVICE - VectorCU - operator*(VectorCU const &v) const { - VectorCU retVal((vsize_t)data.size()); - vsize_t resI = 0; - for(vsize_t x = 0; - x < data.size(); - ++x){ - auto row = data[x]; - for(vsize_t i = 0; i < row.size(); ++i){ - retVal[resI] += row[i] * v[i]; - } - ++resI; - } - return retVal; - } - - HOST_DEVICE - MatrixCU - operator+(MatrixCU const&rhs) const{ - MatrixCU retVal(data.size(), data.size()); - int x = 0; int y = 0; - for(vsize_t j = 0; - j < data.size(); - ++j){ - auto r = data[j]; - for(vsize_t k = 0; - k < data.size(); - ++k){ - auto i = r[k]; - retVal[x][y] = i + rhs[x][y]; - ++y; - } - y = 0; ++x; - } - return retVal; - } -}; - } // end of namespace flit #endif // CU_HELPERS_HPP diff --git a/src/MatrixCU.h b/src/MatrixCU.h new file mode 100644 index 00000000..664529b3 --- /dev/null +++ b/src/MatrixCU.h @@ -0,0 +1,152 @@ +#ifndef MATRIX_CU_H +#define MATRIX_CU_H + +#include "CUHelpers.h" +#include "VectorCU.h" +#include "cuvector.h" + +namespace flit { + +template +class MatrixCU { + using rdtype = cuvector; + cuvector data; +public: + using vsize_t = typename cuvector::cvs_t; + + HOST_DEVICE + MatrixCU(vsize_t rows, vsize_t cols): + data(rows, cuvector(cols,0)){} + + HOST_DEVICE + inline + rdtype& + operator[](vsize_t indx){ + return data[indx]; + } + + HOST_DEVICE + inline + rdtype + operator[](vsize_t indx) const { + return data[indx]; + } + + HOST_DEVICE + bool + operator==(MatrixCU const &rhs) const { + if(data.size() != rhs.data.size()) return false; + bool retVal = true; + for(vsize_t x = 0; x < data.size(); ++x){ + for(vsize_t y = 0; y < data[0].size(); ++y){ + if(data[x][y] != rhs.data[x][y]){ + retVal = false; + break; + } + } + } + return retVal; + } + + HOST_DEVICE + MatrixCU + operator*(T const &sca){ + MatrixCU retVal(data.size(), data[0].size()); + for(vsize_t x = 0; x < data.size(); ++x){ + for(vsize_t y = 0; y < data[0].size(); ++y){ + retVal.data[x][y] = data[x][y] * sca; + } + } + return retVal; + } + + HOST_DEVICE + MatrixCU + operator*(MatrixCU const &rhs){ + MatrixCU retVal(data.size(), rhs.data[0].size()); + for(vsize_t bcol = 0; bcol < rhs.data[0].size(); ++bcol){ + for(vsize_t x = 0; x < data.size(); ++x){ + for(vsize_t y = 0; y < data[0].size(); ++y){ + retVal.data[x][bcol] += data[x][y] * rhs.data[y][bcol]; + } + } + } + return retVal; + } + + HOST_DEVICE + static + MatrixCU + SkewSymCrossProdM(VectorCU const &v){ + MatrixCU retVal(3,3); + retVal[0][0] = 0; + retVal[0][1] = -v[2]; + retVal[0][2] = v[1]; + + retVal[0][0] = v[2]; + retVal[1][1] = 0; + retVal[2][2] = -v[0]; + + retVal[0][0] = -v[1]; + retVal[1][1] = v[0]; + retVal[2][2] = 0; + + return retVal; + } + + HOST_DEVICE + static + MatrixCU + Identity(size_t dims){ + MatrixCU retVal(dims, dims); + for(size_t x = 0; x < dims; ++x){ + for(size_t y = 0; y < dims; ++y){ + if(x == y) retVal[x][y] = 1; + else retVal[x][y] = 0; + } + } + return retVal; + } + + HOST_DEVICE + VectorCU + operator*(VectorCU const &v) const { + VectorCU retVal((vsize_t)data.size()); + vsize_t resI = 0; + for(vsize_t x = 0; + x < data.size(); + ++x){ + auto row = data[x]; + for(vsize_t i = 0; i < row.size(); ++i){ + retVal[resI] += row[i] * v[i]; + } + ++resI; + } + return retVal; + } + + HOST_DEVICE + MatrixCU + operator+(MatrixCU const&rhs) const{ + MatrixCU retVal(data.size(), data.size()); + int x = 0; int y = 0; + for(vsize_t j = 0; + j < data.size(); + ++j){ + auto r = data[j]; + for(vsize_t k = 0; + k < data.size(); + ++k){ + auto i = r[k]; + retVal[x][y] = i + rhs[x][y]; + ++y; + } + y = 0; ++x; + } + return retVal; + } +}; + +} // end of namespace flit + +#endif // MATRIX_CU_H diff --git a/src/Vector.h b/src/Vector.h index 577ae791..cc3dd4e0 100644 --- a/src/Vector.h +++ b/src/Vector.h @@ -1,6 +1,8 @@ #ifndef FLIT_VECTOR_H #define FLIT_VECTOR_H +#include "flitHelpers.h" + #include // for std::generate #include // for std::sqrt #include // for std::abs diff --git a/src/VectorCU.h b/src/VectorCU.h new file mode 100644 index 00000000..d57e759f --- /dev/null +++ b/src/VectorCU.h @@ -0,0 +1,219 @@ +#ifndef VECTOR_CU_H +#define VECTOR_CU_H + +#include "CUHelpers.h" +#include "cuvector.h" + +namespace flit { + +template +class MatrixCU; + +template +class VectorCU { + cuvector data; + friend class MatrixCU; +public: + using vsize_t = typename cuvector::cvs_t; + + HOST_DEVICE + explicit + VectorCU(vsize_t dim) : data(dim) {} + HOST VectorCU(std::initializer_list l) : data(l) {} + HOST_DEVICE VectorCU(const T* array, vsize_t size) : data(array, size) {} + + // copy support + HOST_DEVICE VectorCU(const VectorCU& rhs):data(rhs.data){} + HOST_DEVICE VectorCU(const cuvector& vals):data(vals){} + HOST_DEVICE VectorCU& operator=(const VectorCU& rhs) { data = rhs.data; return *this; } + HOST_DEVICE VectorCU& operator=(const cuvector& vals) { data = vals; return *this; } + + // move support + HOST_DEVICE VectorCU(VectorCU&& rhs):data(std::move(rhs.data)){} + HOST_DEVICE VectorCU(cuvector&& vals):data(std::move(vals)){} + HOST_DEVICE VectorCU& operator=(VectorCU&& rhs) { data = std::move(rhs.data); return *this; } + HOST_DEVICE VectorCU& operator=(cuvector&& vals) { data = std::move(vals); return *this; } + + HOST_DEVICE + T& + operator[](vsize_t index){ + return data[index]; + } + + HOST_DEVICE + T + operator[](vsize_t index) const { + return data[index]; + } + + HOST_DEVICE + inline vsize_t + size() const noexcept { + return data.size(); + } + + DEVICE + static + VectorCU + getRandomVector(vsize_t dim){ + VectorCU retVal(dim); + auto rands = getRandSeqCU(); + for(vsize_t x = 0; x < dim; ++x){ + retVal.data[x] = rands[x]; + } + return retVal; + } + + //predoncition: this only works with vectors of + //predetermined size, now 16 + DEVICE + VectorCU + genOrthoVector() const { + VectorCU retVal(data.size()); + auto shuff = get16ShuffledCU(); + for(vsize_t x = 0; x < data.size(); x += 2){ + retVal[shuff[x]] = data[shuff[x+1]]; + retVal[shuff[x+1]] = -data[shuff[x]]; + } + return retVal; + } + + HOST_DEVICE + VectorCU + rotateAboutZ_3d(T rads){ + MatrixCU t(3,3); + t[0][0]=ccos(rads); t[0][1]=-csin(rads); t[0][2]=0; + t[1][0]=csin(rads); t[1][1]=ccos(rads); t[1][2]=0; + t[2][0]=0; t[2][1]=0; t[2][2]=1; + return t * (*this); + } + + HOST_DEVICE + VectorCU + getUnitVector() const { + VectorCU retVal(*this); + return retVal * ((T)1.0 / (L2Norm())); + } + + HOST_DEVICE + bool + operator==(VectorCU const &b){ + if(this->data.size() != b.data.size()) return false; + for(vsize_t x = 0; x < b.data.size(); ++x){ + if(data[x] != b.data[x]) return false; + } + return true; + } + + HOST_DEVICE + T + L1Distance(VectorCU const &rhs) const { + T distance = 0; + for(vsize_t x = 0; x < data.size(); ++x){ + distance += std::abs(data[x] - rhs.data[x]); + } + return distance; + } + + HOST_DEVICE + T + operator^(VectorCU const &rhs) const { + T sum = 0.0; + for(vsize_t i = 0; i < data.size(); ++i){ + sum += data[i] * rhs.data[i]; + } + return sum; + } + + HOST_DEVICE + VectorCU + operator*(VectorCU const &rhs) const{ + VectorCU ret(data.size()); + for(vsize_t x = 0; x < data.size(); ++x){ + ret[x] = data[x] * rhs.data[x]; + } + return ret; + } + + HOST_DEVICE + VectorCU + operator*(T const& sca) const { + VectorCU ret(data.size()); + for(vsize_t x = 0; x < data.size(); ++x){ + ret[x] = data[x] * sca; + } + return ret; + } + + HOST_DEVICE + VectorCU + operator-(const VectorCU& rhs) const { + VectorCU retVal(data.size()); + for(vsize_t x = 0; + x < data.size(); + ++x){ + retVal.data[x] = data[x] - rhs.data[x]; + } + return retVal; + } + + HOST_DEVICE + T + LInfNorm() const { + T largest = 0; + for(vsize_t x = 0; + x < data.size(); + ++x){ + T tmp = abs(data[x]); + if(tmp > largest) largest = tmp; + } + return largest; + } + + HOST_DEVICE + T + LInfDistance(VectorCU const &rhs) const { + auto diff = operator-(rhs); + return diff.LInfNorm(); + } + + //TODO this assumes there is only float and double on + //CUDA (may change for half precision) + HOST_DEVICE + T + L2Norm() const { + VectorCU squares = (*this) * (*this); + T retVal = (T)0.0; + for(vsize_t x = 0; + x < data.size(); + ++x) retVal += squares.data[x]; + if(sizeof(T) == 4) return sqrtf(retVal); + else return sqrt(retVal); + } + + T + HOST_DEVICE + L2Distance(VectorCU const &rhs) const { + return ((*this) - rhs).L2Norm(); + } + + HOST_DEVICE + VectorCU + cross(VectorCU const &rhs) const { + VectorCU retVal(data.size()); + retVal.data[0] = data[1] * rhs.data[2] - rhs.data[1] * data[2]; + retVal.data[1] = rhs.data[0] * data[2] - data[0] * rhs.data[2]; + retVal.data[2] = data[0] * rhs.data[1] - rhs.data[0] * data[1]; + return retVal; + } + + HOST_DEVICE + bool + isOrtho(VectorCU const &rhs){ + return operator^(rhs) == (T)0; + } +}; + +} // end of namespace flit + +#endif // VECTOR_CU_H diff --git a/src/CUVector.h b/src/cuvector.h similarity index 78% rename from src/CUVector.h rename to src/cuvector.h index 6dd99c7a..c38364db 100644 --- a/src/CUVector.h +++ b/src/cuvector.h @@ -1,5 +1,5 @@ -#ifndef CU_VECTOR_HPP -#define CU_VECTOR_HPP +#ifndef CUVECTOR_H +#define CUVECTOR_H #include "CUHelpers.h" @@ -89,45 +89,6 @@ class cuvector { // reuse the move assignment operator and copy constructor HOST_DEVICE cuvector& operator=(const cuvector& rhs) { *this = cuvector(rhs); return *this; } HOST cuvector& operator=(const std::vector& rhs) { *this = cuvector(rhs); return *this; } -// HOST_DEVICE -// cuvector& -// operator=(const cuvector& rhs){ -// if (tsize > 0) delete[] _data; -// tsize = 0; -// vsize = rhs.vsize; -// if (vsize > 0) { -// _data = new T[vsize]; -// invalid = _data == nullptr; -// if (!invalid) { -// for (cvs_t x = 0; x < vsize; ++x) { -// _data[x] = rhs[x]; -// } -// tsize=vsize; -// } -// } -// return *this; -// } -// -// HOST -// cuvector& -// operator=(const std::vector& rhs){ -// // Reuse the move assignment operator and copy constructor -// *this = cuvector(rhs); -// return *this; -// if (tsize > 0) delete[] _data; -// vsize = rhs.size(); -// if (vsize > 0) { -// _data = new T[vsize]; -// invalid = _data == nullptr; -// if(!invalid){ -// for(cvs_t x = 0; x < vsize; ++x){ -// _data[x] = rhs[x]; -// } -// tsize=vsize; -// } -// } -// return *this; -// } // move support // Unfortunately, we cannot provide moves from std::vector @@ -189,13 +150,10 @@ class cuvector { HOST_DEVICE inline void emplace_back(Args&&... args){ - printf("hi from emplace\n"); if(vsize == tsize) grow(); - printf("emp2\n"); if(!invalid){ _data[vsize++] = T(std::forward(args)...); } - printf("emp3\n"); } HOST_DEVICE @@ -223,4 +181,4 @@ class cuvector { size() const noexcept {return vsize;} }; -#endif // CU_VECTOR_HPP +#endif // CUVECTOR_H diff --git a/src/flit.h b/src/flit.h index 7a42ffd9..440301d9 100644 --- a/src/flit.h +++ b/src/flit.h @@ -4,10 +4,12 @@ #ifndef FLIT_H #define FLIT_H 0 -#include "flitHelpers.h" #include "Matrix.h" +#include "MatrixCU.h" #include "TestBase.h" #include "Vector.h" +#include "VectorCU.h" +#include "flitHelpers.h" #ifdef __CUDA__ //#include From 84eea5c5546f2a4eb48ba28e2d7b06448b91b707 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Fri, 26 Jan 2018 11:32:58 -0700 Subject: [PATCH 20/20] paranoia: get rid of unused variable warning --- litmus-tests/tests/DoHariGSImproved.cpp | 2 +- litmus-tests/tests/Paranoia.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/litmus-tests/tests/DoHariGSImproved.cpp b/litmus-tests/tests/DoHariGSImproved.cpp index ea3e24b1..41215bc1 100644 --- a/litmus-tests/tests/DoHariGSImproved.cpp +++ b/litmus-tests/tests/DoHariGSImproved.cpp @@ -1,4 +1,4 @@ -#include "flit.h" +#include #include #include diff --git a/litmus-tests/tests/Paranoia.cpp b/litmus-tests/tests/Paranoia.cpp index 471742ca..6b5cfa79 100644 --- a/litmus-tests/tests/Paranoia.cpp +++ b/litmus-tests/tests/Paranoia.cpp @@ -333,7 +333,7 @@ flit::Variant Paranoia::run_impl(const std::vector& ti) TimeoutStatus = 5, OverflowStatus = 6, }; - auto status = ExitStatus::SuccessStatus; + //auto status = ExitStatus::SuccessStatus; /* First two assignments use integer right-hand sides. */ zero = 0; @@ -1839,32 +1839,32 @@ flit::Variant Paranoia::run_impl(const std::vector& ti) catch (const TimeoutError &e) { FLIT_UNUSED(e); info_stream << id << ": timeout error occurred" << endl; - status = ExitStatus::TimeoutStatus; + //status = ExitStatus::TimeoutStatus; } catch (const FailureError &e) { FLIT_UNUSED(e); info_stream << id << ": failure error occurred" << endl; - status = ExitStatus::FailureStatus; + //status = ExitStatus::FailureStatus; } catch (const SeriousError &e) { FLIT_UNUSED(e); info_stream << id << ": serious error occurred" << endl; - status = ExitStatus::SeriousStatus; + //status = ExitStatus::SeriousStatus; } catch (const DefectError &e) { FLIT_UNUSED(e); info_stream << id << ": defect error occurred" << endl; - status = ExitStatus::DefectStatus; + //status = ExitStatus::DefectStatus; } catch (const FlawError &e) { FLIT_UNUSED(e); info_stream << id << ": flaw error occurred" << endl; - status = ExitStatus::FlawStatus; + //status = ExitStatus::FlawStatus; } catch (const OverflowError &e) { FLIT_UNUSED(e); info_stream << id << ": overflow error occurred" << endl; - status = ExitStatus::OverflowStatus; + //status = ExitStatus::OverflowStatus; } return Milestone;