diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..881a518 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,16 @@ +name: RunUnitTest + +on: [push, pull_request] + +jobs: + run-tests: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build and Run Tests + working-directory: test + run: | + make all diff --git a/.gitignore b/.gitignore index 259148f..e2c2388 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ *.exe *.out *.app + +# Tests +test/*/bin diff --git a/include/internal/defines.h b/include/internal/defines.h index 6c27dfa..8be3730 100644 --- a/include/internal/defines.h +++ b/include/internal/defines.h @@ -7,9 +7,9 @@ #pragma once // XTD_DEVICE_FUNCTION -#if defined(__CUDACC__) || defined(__HIPCC__) +#if defined(__CUDACC__) || defined(__HIPCC__) // CUDA or HIP/ROCm compiler -#define XTD_DEVICE_FUNCTION __host__ __define__ +#define XTD_DEVICE_FUNCTION __host__ __device__ #else // SYCL or standard C++ code #define XTD_DEVICE_FUNCTION @@ -29,3 +29,7 @@ // standard C++ code #define XTD_TARGET_CPU #endif + +#if defined(__SYCL_DEVICE_ONLY__) +#include +#endif diff --git a/include/math/sin.h b/include/math/sin.h index 92e8f4f..de7f2a4 100644 --- a/include/math/sin.h +++ b/include/math/sin.h @@ -7,6 +7,8 @@ #pragma once #include "internal/defines.h" +#include +#include namespace xtd { @@ -53,9 +55,8 @@ namespace xtd { /* Computes the sine of arg (measured in radians), * in double precision. */ - template > - XTD_DEVICE_FUNCTION - inline constexpr double sin(T arg) { + template >> + XTD_DEVICE_FUNCTION inline constexpr double sin(T arg) { return sin(static_cast(arg)); } @@ -63,17 +64,14 @@ namespace xtd { * in single precision. */ XTD_DEVICE_FUNCTION - inline constexpr float sinf(float arg) { - return sin(arg); - } + inline constexpr float sinf(float arg) { return sin(arg); } /* Computes the sine of arg (measured in radians), * in single precision. */ - template > - XTD_DEVICE_FUNCTION - inline constexpr double sinf(T arg) { + template >> + XTD_DEVICE_FUNCTION inline constexpr double sinf(T arg) { return sin(static_cast(arg)); } -} // namespace xtd +} // namespace xtd diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..404e240 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,189 @@ +.PHONY: all build run clean + +all: build run + +# gcc +CXX := g++ +GCC_TOOLCHAIN := $(abspath $(dir $(shell which $(CXX)))/..) +GCC_TARGET := $(shell $(CXX) -dumpmachine) +# Catch2 needs -Wno-unused-variable +HOST_CXXFLAGS := -O2 -fPIC -pthread -march=native -Wall -Wextra -Werror -Wfatal-errors -Wno-unused-variable + +# Compiler flags supported by GCC but not by the LLVM-based compilers (clang, hipcc, icpx, etc.) +LLVM_UNSUPPORTED_CXXFLAGS := --param vect-max-version-for-alias-checks=50 -Werror=format-contains-nul -Wno-non-template-friend -Werror=return-local-addr -Werror=unused-but-set-variable + +CXXFLAGS := -std=c++17 $(HOST_CXXFLAGS) -g +LDFLAGS := -O2 -fPIC -pthread -Wl,-E -lstdc++fs -ldl + +# CUDA +CUDA_BASE := /usr/local/cuda +ifeq ($(wildcard $(CUDA_BASE)),) + # CUDA platform not found + $(warning Cannot find an NVIDIA CUDA installation at $(CUDA_BASE)) + CUDA_BASE := +else +# CUDA platform at $(CUDA_BASE) + CUDA_LIBDIR := $(CUDA_BASE)/lib64 + CUDA_DEPS := $(CUDA_LIBDIR)/libcudart.so + CUDA_ARCH := 60 70 80 + CUDA_CXXFLAGS := -I$(CUDA_BASE)/include + CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt + CUDA_NVCC := $(CUDA_BASE)/bin/nvcc + define CUFLAGS_template + $(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=[sm_$$(ARCH),compute_$$(ARCH)]) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --display-error-number --threads $$(words $(1)) --cudart=shared + $(2)NVCC_COMMON := -std=c++17 -O3 -g $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' + $(2)CUDA_CUFLAGS := $$($(2)NVCC_COMMON) + endef + $(eval $(call CUFLAGS_template,$(CUDA_ARCH),)) + NVCC_COMMON := -std=c++17 -O3 -g $(NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' + CUDA_CUFLAGS := $(NVCC_COMMON) +endif + +# ROCm +ROCM_BASE := /opt/rocm +ifeq ($(wildcard $(ROCM_BASE)),) + # ROCm platform not found + $(warning Cannot find an AMD ROCm installation at $(ROCM_BASE)) + ROCM_BASE := +else + # ROCm platform at $(ROCM_BASE) + ROCM_LIBDIR := $(ROCM_BASE)/lib + ROCM_DEPS := $(ROCM_LIBDIR)/libamdhip64.so + ROCM_ARCH := gfx900 gfx90a gfx1030 + ROCM_HIPCC := $(ROCM_BASE)/bin/hipcc + HIPCC_CXXFLAGS := -fno-gpu-rdc $(foreach ARCH,$(ROCM_ARCH),--offload-arch=$(ARCH)) $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) -I$(ROCM_BASE)/include/hip -Wno-unused-result + HIPCC_LDFLAGS := $(LDFLAGS) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) +endif + +# oneAPI +ONEAPI_BASE := /opt/intel/oneapi +ifeq ($(wildcard $(ONEAPI_BASE)),) + # Intel oneAPI not available + $(warning Cannot find an Intel oneAPI installation at $(ONEAPI_BASE)) + ONEAPI_BASE := +else + SYCL_BASE := $(ONEAPI_BASE)/compiler/latest + SYCL_LIBDIR := $(SYCL_BASE)/lib + SYCL_CXX := $(SYCL_BASE)/bin/icpx + SYCL_CPU_TARGET := -fsycl-targets=spir64_x86_64 + SYCL_FLAGS := -fsycl -fp-model=precise + SYCL_CXXFLAGS := $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) $(SYCL_FLAGS) -Wno-unused-variable + SYCL_LDFLAGS := + + # Check for Intel GPU existence + SYCL_LS := $(shell mktemp) + $(shell sycl-ls > $(SYCL_LS)) + INTEL_GPU_EXISTS := $(shell cat $(SYCL_LS) | grep -c ext_oneapi_level_zero) + # Set AOT_INTEL_TARGETS based on the conditions + ifneq ($(INTEL_GPU_EXISTS),0) + ifneq ($(shell cat $(SYCL_LS) | grep -c 'GPU Flex'),0) + AOT_INTEL_TARGETS := -fsycl-targets=intel_gpu_acm_g10 + else ifneq ($(shell cat $(SYCL_LS) | grep -c 'GPU Max'),0) + AOT_INTEL_TARGETS := -fsycl-targets=intel_gpu_pvc + else + # rely on JIT + AOT_INTEL_TARGETS := -fsycl-targets=spir64 + endif + else + INTEL_GPU_EXISTS := + AOT_INTEL_TARGETS := + endif + $(shell rm -f $(SYCL_LS)) + undefine SYCL_LS +endif + +# xtd +XTD_BASE := $(realpath $(dir $(realpath $(lastword $(MAKEFILE_LIST))))/..) + +# external Catch2 library +CATCH2_INCLUDE := $(XTD_BASE)/test/external/catch2/include/catch.hpp + +external_catch2: $(CATCH2_INCLUDE) + +$(CATCH2_INCLUDE): + mkdir -p $(dir $@) + wget https://github.com/catchorg/Catch2/releases/download/v2.13.8/catch.hpp -O $@ + +LIB_INCLUDE := -I$(XTD_BASE)/include -I$(XTD_BASE)/test -I$(dir $(CATCH2_INCLUDE)) + +# xtd tests +SUBDIRS := $(wildcard $(XTD_BASE)/test/*/) +TARGETS_ALL := $(filter-out common, $(filter-out external, $(notdir $(patsubst %/,%,$(SUBDIRS))))) + +define TEST_template +$(1)/bin: + mkdir -p $(1)/bin + +$(1)Test: external_catch2 $(1)/bin/$(1)_t_cc $(1)/bin/$(1)_t_cuda $(1)/bin/$(1)_t_hip $(1)/bin/$(1)_t_cpusycl $(1)/bin/$(1)_t_gpusycl + +$(1)/bin/$(1)_t_cc: $(1)/$(1)_t.cc | $(1)/bin + $(CXX) $(CXXFLAGS) $(LIB_INCLUDE) $$< -o $$@ + +$(1)/bin/$(1)_t_cuda: $(1)/$(1)_t.cu | $(1)/bin + @if [ -z "$(CUDA_BASE)" ]; then \ + echo "Error: CUDA_BASE not set. Skipping $@"; \ + else \ + echo "$(CUDA_NVCC) $(CUDA_CXXFLAGS) $(CUDA_LDFLAGS) $(CUDA_CUFLAGS) $(LIB_INCLUDE) $$< -o $$@"; \ + $(CUDA_NVCC) $(CUDA_CXXFLAGS) $(CUDA_LDFLAGS) $(CUDA_CUFLAGS) $(LIB_INCLUDE) $$< -o $$@; \ + fi + +$(1)/bin/$(1)_t_hip: $(1)/$(1)_t.hip.cc | $(1)/bin + @if [ -z "$(ROCM_BASE)" ]; then \ + echo "Error: ROCM_BASE not set. Skipping $@"; \ + else \ + echo "$(ROCM_HIPCC) $(HIPCC_CXXFLAGS) $(HIPCC_LDFLAGS) $(LIB_INCLUDE) $$< -o $$@"; \ + $(ROCM_HIPCC) $(HIPCC_CXXFLAGS) $(HIPCC_LDFLAGS) $(LIB_INCLUDE) $$< -o $$@; \ + fi + +$(1)/bin/$(1)_t_cpusycl: $(1)/$(1)_t.sycl.cc | $(1)/bin + @if [ -z "$(ONEAPI_BASE)" ]; then \ + echo "Error: ONEAPI_BASE not set. Skipping $@"; \ + else \ + echo "$(SYCL_CXX) $(SYCL_CXXFLAGS) $(SYCL_LDFLAGS) $(SYCL_CPU_TARGET) -DONEAPI_CPU $(LIB_INCLUDE) $$< -o $$@"; \ + $(SYCL_CXX) $(SYCL_CXXFLAGS) $(SYCL_LDFLAGS) $(SYCL_CPU_TARGET) -DONEAPI_CPU $(LIB_INCLUDE) $$< -o $$@; \ + fi + +$(1)/bin/$(1)_t_gpusycl: $(1)/$(1)_t.sycl.cc | $(1)/bin + @if [ -z "$(ONEAPI_BASE)" ]; then \ + echo "Error: ONEAPI_BASE not set. Skipping $@"; \ + else \ + if [ "$(INTEL_GPU_EXISTS)" ]; then \ + echo "$(SYCL_CXX) $(SYCL_CXXFLAGS) $(SYCL_LDFLAGS) $(AOT_INTEL_TARGETS) $(LIB_INCLUDE) $$< -o $$@"; \ + $(SYCL_CXX) $(SYCL_CXXFLAGS) $(SYCL_LDFLAGS) $(AOT_INTEL_TARGETS) $(LIB_INCLUDE) $$< -o $$@; \ + else \ + echo "Error: Intel GPU not found. Skipping $@"; \ + fi \ + fi + +# List of test executables +$(1)_BIN := $(XTD_BASE)/test/$(1)/bin + +# Add targets +TEST_EXECUTABLES := $(1)/bin/$(1)_t_cc +ifdef $(CUDA_BASE) + TEST_EXECUTABLES += $(1)/bin/$(1)_t_cuda +endif +ifdef $(ROCM_BASE) + TEST_EXECUTABLES += $(1)/bin/$(1)_t_hip +endif +ifdef $(ONEAPI_BASE) + TEST_EXECUTABLES += $(1)/bin/$(1)_t_cpusycl + ifdef $(INTEL_GPU_EXISTS) + TEST_EXECUTABLES += $(1)/bin/$(1)_t_gpusycl + endif +endif + +run_$(1)Test: $(TEST_EXECUTABLES) + @find $$($(1)_BIN) -maxdepth 1 -type f -exec echo "Running {}" \; -exec {} \; -exec echo \; + +clean_$(1)Test: + rm -rf $(1)/bin +endef + +$(foreach target,$(TARGETS_ALL),$(eval $(call TEST_template,$(target)))) + +build: $(foreach target,$(TARGETS_ALL), $(target)Test) + +run: $(foreach target,$(TARGETS_ALL), run_$(target)Test) + +clean: $(foreach target,$(TARGETS_ALL), clean_$(target)Test) diff --git a/test/common/cuda_check.h b/test/common/cuda_check.h new file mode 100644 index 0000000..98c71ea --- /dev/null +++ b/test/common/cuda_check.h @@ -0,0 +1,61 @@ +#pragma once + +// C++ standard headers +#include +#include +#include + +// CUDA headers +#include +#include + +namespace internal { + + [[noreturn]] inline void abortOnCudaError(const char* file, + int line, + const char* cmd, + const char* error, + const char* message, + const char* description = nullptr) { + std::ostringstream out; + out << "\n"; + out << file << ", line " << line << ":\n"; + out << "CUDA_CHECK(" << cmd << ");\n"; + out << error << ": " << message << "\n"; + if (description) + out << description << "\n"; + + throw std::runtime_error(out.str()); + } + + inline void cudaCheck(const char* file, + int line, + const char* cmd, + CUresult result, + const char* description = nullptr) { + if (result == CUDA_SUCCESS) + return; + + const char* error; + const char* message; + cuGetErrorName(result, &error); + cuGetErrorString(result, &message); + abortOnCudaError(file, line, cmd, error, message, description); + } + + inline void cudaCheck(const char* file, + int line, + const char* cmd, + cudaError_t result, + const char* description = nullptr) { + if (result == cudaSuccess) + return; + + const char* error = cudaGetErrorName(result); + const char* message = cudaGetErrorString(result); + abortOnCudaError(file, line, cmd, error, message, description); + } + +} // namespace internal + +#define CUDA_CHECK(ARG, ...) (internal::cudaCheck(__FILE__, __LINE__, #ARG, (ARG), ##__VA_ARGS__)) diff --git a/test/common/hip_check.h b/test/common/hip_check.h new file mode 100644 index 0000000..e7d8680 --- /dev/null +++ b/test/common/hip_check.h @@ -0,0 +1,45 @@ +#pragma once + +// C++ standard headers +#include +#include +#include + +// HIP headers +#include + +namespace internal { + + [[noreturn]] inline void abortOnHipError(const char *file, + int line, + const char *cmd, + const char *error, + const char *message, + const char *description = nullptr) { + std::ostringstream out; + out << "\n"; + out << file << ", line " << line << ":\n"; + out << "HIP_CHECK(" << cmd << ");\n"; + out << error << ": " << message << "\n"; + if (description) + out << description << "\n"; + + throw std::runtime_error(out.str()); + } + + inline void hipCheck(const char *file, + int line, + const char *cmd, + hipError_t result, + const char *description = nullptr) { + if (result == hipSuccess) + return; + + const char *error = hipGetErrorName(result); + const char *message = hipGetErrorString(result); + abortOnHipError(file, line, cmd, error, message, description); + } + +} // namespace internal + +#define HIP_CHECK(ARG, ...) (internal::hipCheck(__FILE__, __LINE__, #ARG, (ARG), ##__VA_ARGS__)) diff --git a/test/sin/sin_t.cc b/test/sin/sin_t.cc new file mode 100644 index 0000000..01791f6 --- /dev/null +++ b/test/sin/sin_t.cc @@ -0,0 +1,31 @@ +// C++ standard headers +#include +#include +#include + +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// xtd headers +#include "math.h" + +TEST_CASE("sinCPU", "[sin]") { + auto const epsilon = std::numeric_limits::epsilon(); + auto const epsilon_f = std::numeric_limits::epsilon(); + + std::vector values{-1., 0., M_PI / 2, M_PI, 42.}; + + for (auto &v : values) { + REQUIRE_THAT(xtd::sin(static_cast(v)), + Catch::Matchers::WithinAbs(std::sin(static_cast(v)), epsilon)); + REQUIRE_THAT(xtd::sin(static_cast(v)), + Catch::Matchers::WithinAbs(std::sin(v), epsilon_f)); + REQUIRE_THAT(xtd::sin(static_cast(v)), + Catch::Matchers::WithinAbs(std::sin(v), epsilon)); + REQUIRE_THAT(xtd::sinf(static_cast(v)), + Catch::Matchers::WithinAbs(sinf(static_cast(v)), epsilon_f)); + REQUIRE_THAT(xtd::sinf(static_cast(v)), Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + REQUIRE_THAT(xtd::sinf(static_cast(v)), Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + } +} diff --git a/test/sin/sin_t.cu b/test/sin/sin_t.cu new file mode 100644 index 0000000..6ddcc56 --- /dev/null +++ b/test/sin/sin_t.cu @@ -0,0 +1,80 @@ +// C++ standard headers +#include +#include +#include + +// CUDA headers +#include + +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// xtd headers +#include "math.h" + +// test headers +#include "common/cuda_check.h" + +template +__global__ void sinKernel(double *result, T input) { + *result = static_cast(xtd::sin(input)); +} + +template +__global__ void sinfKernel(double *result, T input) { + *result = static_cast(xtd::sinf(input)); +} + +TEST_CASE("sinCUDA", "[sin]") { + int deviceCount; + cudaError_t cudaStatus = cudaGetDeviceCount(&deviceCount); + + if (cudaStatus != cudaSuccess || deviceCount == 0) { + std::cout << "No NVIDIA GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + + CUDA_CHECK(cudaSetDevice(0)); + cudaStream_t q; + CUDA_CHECK(cudaStreamCreate(&q)); + + std::vector values{-1., 0., M_PI / 2, M_PI, 42.}; + + double *result; + int constexpr N = 6; + CUDA_CHECK(cudaMallocAsync(&result, N * sizeof(double), q)); + + for (auto v : values) { + CUDA_CHECK(cudaMemsetAsync(result, 0x00, N * sizeof(double), q)); + + sinKernel<<<1, 1, 0, q>>>(result + 0, static_cast(v)); + CUDA_CHECK(cudaGetLastError()); + sinKernel<<<1, 1, 0, q>>>(result + 1, static_cast(v)); + CUDA_CHECK(cudaGetLastError()); + sinKernel<<<1, 1, 0, q>>>(result + 2, static_cast(v)); + CUDA_CHECK(cudaGetLastError()); + sinfKernel<<<1, 1, 0, q>>>(result + 3, static_cast(v)); + CUDA_CHECK(cudaGetLastError()); + sinfKernel<<<1, 1, 0, q>>>(result + 4, static_cast(v)); + CUDA_CHECK(cudaGetLastError()); + sinfKernel<<<1, 1, 0, q>>>(result + 5, static_cast(v)); + CUDA_CHECK(cudaGetLastError()); + + double resultHost[N]; + CUDA_CHECK(cudaMemcpyAsync(resultHost, result, N * sizeof(double), cudaMemcpyDeviceToHost, q)); + CUDA_CHECK(cudaStreamSynchronize(q)); + + auto const epsilon = std::numeric_limits::epsilon(); + auto const epsilon_f = std::numeric_limits::epsilon(); + REQUIRE_THAT(resultHost[0], Catch::Matchers::WithinAbs(std::sin(static_cast(v)), epsilon)); + REQUIRE_THAT(resultHost[1], Catch::Matchers::WithinAbs(std::sin(v), epsilon_f)); + REQUIRE_THAT(resultHost[2], Catch::Matchers::WithinAbs(std::sin(v), epsilon)); + REQUIRE_THAT(resultHost[3], Catch::Matchers::WithinAbs(sinf(static_cast(v)), epsilon_f)); + REQUIRE_THAT(resultHost[4], Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + REQUIRE_THAT(resultHost[5], Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + } + + CUDA_CHECK(cudaFreeAsync(result, q)); + CUDA_CHECK(cudaStreamDestroy(q)); +} diff --git a/test/sin/sin_t.hip.cc b/test/sin/sin_t.hip.cc new file mode 100644 index 0000000..c33f3c7 --- /dev/null +++ b/test/sin/sin_t.hip.cc @@ -0,0 +1,80 @@ +// C++ standard headers +#include +#include +#include + +// HIP headers +#include + +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// xtd headers +#include "math.h" + +// test headers +#include "common/hip_check.h" + +template +__global__ void sinKernel(double *result, T input) { + *result = static_cast(xtd::sin(input)); +} + +template +__global__ void sinfKernel(double *result, T input) { + *result = static_cast(xtd::sinf(input)); +} + +TEST_CASE("sinHIP", "[sin]") { + int deviceCount; + hipError_t hipStatus = hipGetDeviceCount(&deviceCount); + + if (hipStatus != hipSuccess || deviceCount == 0) { + std::cout << "No AMD GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + + HIP_CHECK(hipSetDevice(0)); + hipStream_t q; + HIP_CHECK(hipStreamCreate(&q)); + + std::vector values{-1., 0., M_PI / 2, M_PI, 42.}; + + double *result; + int constexpr N = 6; + HIP_CHECK(hipMallocAsync(&result, N * sizeof(double), q)); + + for (auto v : values) { + HIP_CHECK(hipMemsetAsync(result, 0x00, N * sizeof(double), q)); + + sinKernel<<<1, 1, 0, q>>>(result + 0, static_cast(v)); + HIP_CHECK(hipGetLastError()); + sinKernel<<<1, 1, 0, q>>>(result + 1, static_cast(v)); + HIP_CHECK(hipGetLastError()); + sinKernel<<<1, 1, 0, q>>>(result + 2, static_cast(v)); + HIP_CHECK(hipGetLastError()); + sinfKernel<<<1, 1, 0, q>>>(result + 3, static_cast(v)); + HIP_CHECK(hipGetLastError()); + sinfKernel<<<1, 1, 0, q>>>(result + 4, static_cast(v)); + HIP_CHECK(hipGetLastError()); + sinfKernel<<<1, 1, 0, q>>>(result + 5, static_cast(v)); + HIP_CHECK(hipGetLastError()); + + double resultHost[N]; + HIP_CHECK(hipMemcpyAsync(resultHost, result, N * sizeof(double), hipMemcpyDeviceToHost, q)); + HIP_CHECK(hipStreamSynchronize(q)); + + auto const epsilon = std::numeric_limits::epsilon(); + auto const epsilon_f = std::numeric_limits::epsilon(); + REQUIRE_THAT(resultHost[0], Catch::Matchers::WithinAbs(std::sin(static_cast(v)), epsilon)); + REQUIRE_THAT(resultHost[1], Catch::Matchers::WithinAbs(std::sin(v), epsilon_f)); + REQUIRE_THAT(resultHost[2], Catch::Matchers::WithinAbs(std::sin(v), epsilon)); + REQUIRE_THAT(resultHost[3], Catch::Matchers::WithinAbs(sinf(static_cast(v)), epsilon_f)); + REQUIRE_THAT(resultHost[4], Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + REQUIRE_THAT(resultHost[5], Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + } + + HIP_CHECK(hipFreeAsync(result, q)); + HIP_CHECK(hipStreamDestroy(q)); +} diff --git a/test/sin/sin_t.sycl.cc b/test/sin/sin_t.sycl.cc new file mode 100644 index 0000000..4961aef --- /dev/null +++ b/test/sin/sin_t.sycl.cc @@ -0,0 +1,64 @@ +// C++ standard headers +#include +#include +#include + +// SYCL headers +#include + +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// xtd headers +#include "math.h" + +TEST_CASE("sinSYCL", "[sin]") { + try { + constexpr int N = 6; +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + double *result = sycl::malloc_device(N, queue); + + std::vector values{-1., 0., M_PI / 2, M_PI, 42.}; + + for (auto v : values) { + queue.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { + result[0] = static_cast(xtd::sin(static_cast(v))); + result[1] = static_cast(xtd::sin(static_cast(v))); + result[2] = static_cast(xtd::sin(static_cast(v))); + result[3] = static_cast(xtd::sinf(static_cast(v))); + result[4] = static_cast(xtd::sinf(static_cast(v))); + result[5] = static_cast(xtd::sinf(static_cast(v))); + }); + }); + + double resultHost[N]; + queue.memcpy(resultHost, result, N * sizeof(double)); + queue.wait(); + + auto const epsilon = std::numeric_limits::epsilon(); + auto const epsilon_f = std::numeric_limits::epsilon(); + REQUIRE_THAT(resultHost[0], + Catch::Matchers::WithinAbs(std::sin(static_cast(v)), epsilon)); + REQUIRE_THAT(resultHost[1], Catch::Matchers::WithinAbs(std::sin(v), epsilon_f)); + REQUIRE_THAT(resultHost[2], Catch::Matchers::WithinAbs(std::sin(v), epsilon)); + REQUIRE_THAT(resultHost[3], Catch::Matchers::WithinAbs(sinf(static_cast(v)), epsilon_f)); + REQUIRE_THAT(resultHost[4], Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + REQUIRE_THAT(resultHost[5], Catch::Matchers::WithinAbs(sinf(v), epsilon_f)); + } + sycl::free(result, queue); + } catch (sycl::exception const &exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ + << std::endl; + exit(EXIT_FAILURE); + } +}