diff --git a/.github/workflows/cuda_ci.yml b/.github/workflows/cuda_ci.yml new file mode 100644 index 00000000..176d969b --- /dev/null +++ b/.github/workflows/cuda_ci.yml @@ -0,0 +1,52 @@ +name: cuda-compile + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + cuda-compile: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: Jimver/cuda-toolkit@v0.2.15 + id: cuda-toolkit + with: + cuda: '12.4.1' + + - name: Display CUDA version + run: | + echo "Installed CUDA version is: ${{ steps.cuda-toolkit.outputs.cuda }}" + + - name: Display CUDA install location + run: | + echo "CUDA install location: ${{ steps.cuda-toolkit.outputs.CUDA_PATH }}" + + - name: Check NVCC Version + run: | + nvcc -V + + - name: Install dependencies (Linux no Ceres) + run: ./scripts/install_ubuntu_deps_no_ceres.sh + + - name: Install test + run: | + echo "Install test" + mkdir build_dir + cd build_dir + cmake -DCMAKE_CXX_COMPILER_LAUNCHER=ccache .. -DBUILD_SOPHUS_TESTS=Off -DCMAKE_COMPILE_WARNING_AS_ERROR=On -DSOPHUS_ENABLE_ENSURE_HANDLER=$SOPHUS_ENABLE_ENSURE_HANDLER + # Ubuntu builds via Github actions run on 2-core virtual machines + make -j2 + sudo make install + cd .. + cd examples/cuda + mkdir build_dir + cd build_dir + cmake .. + make + ls -la diff --git a/Sophus.code-workspace b/Sophus.code-workspace index b3aa3987..3d2a2e8a 100644 --- a/Sophus.code-workspace +++ b/Sophus.code-workspace @@ -64,7 +64,24 @@ "streambuf": "cpp", "thread": "cpp", "cinttypes": "cpp", - "typeinfo": "cpp" + "typeinfo": "cpp", + "bitset": "cpp", + "charconv": "cpp", + "condition_variable": "cpp", + "forward_list": "cpp", + "format": "cpp", + "mutex": "cpp", + "span": "cpp", + "variant": "cpp", + "__bit_reference": "cpp", + "__locale": "cpp", + "__threading_support": "cpp", + "__verbose_abort": "cpp", + "ios": "cpp", + "locale": "cpp", + "print": "cpp", + "queue": "cpp", + "stack": "cpp" } } } diff --git a/examples/cuda/CMakeLists.txt b/examples/cuda/CMakeLists.txt new file mode 100644 index 00000000..13d8e7df --- /dev/null +++ b/examples/cuda/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.24) + +project(CUDAVectorAdd) + +enable_language(CUDA) # Enable CUDA language support + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_ARCHITECTURES + 52 + 60 + 61 + 70 + 75 + 80 + 86) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo) +endif() + +add_executable(kernel main.cpp kernel.cu) + +set_target_properties(kernel PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +target_link_libraries(kernel PRIVATE cuda) diff --git a/examples/cuda/kernel.cu b/examples/cuda/kernel.cu new file mode 100644 index 00000000..d42995eb --- /dev/null +++ b/examples/cuda/kernel.cu @@ -0,0 +1,32 @@ +#include +#include + +__global__ void vecAddKernel(float* A, float* B, float* C, int N) { + int i = blockDim.x * blockIdx.x + threadIdx than that on x; + if (i < N) { + C[i] = A[i] + B[i]; + } +} + +// Wrapper function for the CUDA kernel +void cudaVecAdd(float* A, float* B, float* C, int N) { + float *d_A, *d_B, *d_C; + size_t size = N * sizeof(float); + + cudaMalloc(&d_A, size); + cudaMalloc(&d_B, size); + cudaMalloc(&d_C, size); + + cudaMemcpy(d_A, A, size, cudaMemcpyHostToDevice); + cudaMemcpy(d_B, B, size, cudaMemcpyHostToDevice); + + int threadsPerBlock = 256; + int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; + vecAddKernel<<>>(d_A, d_B, d_C, N); + + cudaMemcpy(C, d_C, size, cudaMemcpyDeviceToHost); + + cudaFree(d_A); + cudaFree(d_B); + cudaFree(d_C); +} diff --git a/examples/cuda/main.cpp b/examples/cuda/main.cpp new file mode 100644 index 00000000..efe92db8 --- /dev/null +++ b/examples/cuda/main.cpp @@ -0,0 +1,33 @@ +#include +#include +#include + +extern void cudaVecAdd(float* A, float* B, float* C, int N); + +int main() { + int N = 1024; + std::vector h_A(N, 0); + std::vector h_B(N, 0); + std::vector h_C(N, 0); + + // Initialize vectors + for (int i = 0; i < N; ++i) { + h_A[i] = sin(i) * sin(i); + h_B[i] = cos(i) * cos(i); + } + + // Call the CUDA kernel wrapper function + cudaVecAdd(h_A.data(), h_B.data(), h_C.data(), N); + + // Check the result + for (int i = 0; i < N; ++i) { + float expected = h_A[i] + h_B[i]; + if (abs(h_C[i] - expected) > 1e-5) { + std::cerr << "Result verification failed at element " << i << "!\n"; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED\n"; + return 0; +} diff --git a/scripts/install_ubuntu_deps_no_ceres.sh b/scripts/install_ubuntu_deps_no_ceres.sh new file mode 100755 index 00000000..65a39d05 --- /dev/null +++ b/scripts/install_ubuntu_deps_no_ceres.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -x # echo on +set -e # exit on error + +cmake --version + +sudo apt update -y +sudo apt install libc++-dev libgflags-dev libsuitesparse-dev clang + +git clone https://gitlab.com/libeigen/eigen.git +cd eigen +git checkout c1d637433e3b3f9012b226c2c9125c494b470ae6 + +mkdir build-eigen +cd build-eigen +cmake .. -DEIGEN_DEFAULT_TO_ROW_MAJOR=$ROW_MAJOR_DEFAULT +sudo make install +cd ../..