From cb12820682e27578ef9db3adb311c5801f1dc2fa Mon Sep 17 00:00:00 2001 From: Aaron Jomy <75925957+maximusron@users.noreply.github.com> Date: Sun, 17 Sep 2023 15:15:41 +0530 Subject: [PATCH] Add rich text to kalman-filter CUDA demo (#58) --- .../{kalman.hpp => kalman.h++} | 2 +- notebooks/kalman_CUDA_demo/run_kf.ipynb | 1203 ++++++++++++----- notebooks/kalman_CUDA_demo/run_kf_CUDA.ipynb | 928 ------------- 3 files changed, 835 insertions(+), 1298 deletions(-) rename notebooks/kalman_CUDA_demo/{kalman.hpp => kalman.h++} (96%) delete mode 100644 notebooks/kalman_CUDA_demo/run_kf_CUDA.ipynb diff --git a/notebooks/kalman_CUDA_demo/kalman.hpp b/notebooks/kalman_CUDA_demo/kalman.h++ similarity index 96% rename from notebooks/kalman_CUDA_demo/kalman.hpp rename to notebooks/kalman_CUDA_demo/kalman.h++ index 73faf6fa..0390ded7 100644 --- a/notebooks/kalman_CUDA_demo/kalman.hpp +++ b/notebooks/kalman_CUDA_demo/kalman.h++ @@ -42,7 +42,7 @@ class KalmanFilter { * Update the estimated state based on measured values. The * time step is assumed to remain constant. */ - void update(const std::vector& y); + std::vector update(const std::vector& y); /** * Update the estimated state based on measured values, diff --git a/notebooks/kalman_CUDA_demo/run_kf.ipynb b/notebooks/kalman_CUDA_demo/run_kf.ipynb index 020fd954..a31dab2c 100644 --- a/notebooks/kalman_CUDA_demo/run_kf.ipynb +++ b/notebooks/kalman_CUDA_demo/run_kf.ipynb @@ -1,107 +1,476 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "0aa31d74-5546-4990-9dd4-c44b552443a3", + "metadata": {}, + "source": [ + "## Running a 1 Dimensional Kalman Filter using std::vector and CUDA" + ] + }, { "cell_type": "code", "execution_count": 1, - "id": "afdccef0-6aaa-43d6-ac29-7ea950356e19", + "id": "9f1f08b0-9d14-46c0-af75-4ea72af2d350", "metadata": {}, "outputs": [], "source": [ + "#include \n", + "#include \n", + "#include \n", + "#include \n", "#include \"kalman.hpp\"" ] }, + { + "cell_type": "markdown", + "id": "dc67046f-5dcb-48c4-844b-b7f248cc8cea", + "metadata": {}, + "source": [ + "### Defining CUDA Kernel functions for parallel matrix and vector operations:" + ] + }, { "cell_type": "code", "execution_count": 2, - "id": "6feb6a99-9580-4bf7-aa48-0a7d175494a4", + "id": "ebcc8af4-32b2-418b-bfbe-9ffb4ff9b0ff", "metadata": {}, "outputs": [], "source": [ - "#include \n", - "#include \n", - "#include \n" + "__global__ void matAddKernel(double* a, double* b, double* c, int rows, int cols) {\n", + " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", + " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", + "\n", + " if (col < cols && row < rows) {\n", + " int idx = row * cols + col;\n", + " c[idx] = a[idx] + b[idx];\n", + " }\n", + "}" ] }, { "cell_type": "code", "execution_count": 3, - "id": "ebcc8af4-32b2-418b-bfbe-9ffb4ff9b0ff", + "id": "3f7f95d9-2f3d-4600-8896-0ed4dcca1ed6", + "metadata": {}, + "outputs": [], + "source": [ + "__global__ void matSubKernel(double* a, double* b, double* c, int rows, int cols) {\n", + " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", + " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", + "\n", + " if (col < cols && row < rows) {\n", + " int idx = row * cols + col;\n", + " c[idx] = a[idx] - b[idx];\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "00183ab5-0380-4773-a34c-053fef7782f3", + "metadata": {}, + "outputs": [], + "source": [ + "__global__ void matTransposeKernel(double* a, double* c, int rows, int cols) {\n", + " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", + " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", + "\n", + " if (col < cols && row < rows) {\n", + " int idx_in = row * cols + col;\n", + " int idx_out = col * rows + row;\n", + " c[idx_out] = a[idx_in];\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d0eff8fe-34a5-4370-b6ff-118cd4d7cb22", + "metadata": {}, + "outputs": [], + "source": [ + "__global__ void matvecmulKernel(double* d_mat, double* d_vec, double* d_result, int rows, int cols) {\n", + " int tid = blockIdx.x * blockDim.x + threadIdx.x;\n", + "\n", + " if (tid < rows) {\n", + " double sum = 0.0;\n", + " for (int j = 0; j < cols; j++) {\n", + " sum += d_mat[tid * cols + j] * d_vec[j];\n", + " }\n", + " d_result[tid] = sum;\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "21718df0-5f56-4122-bc1a-a368b306fdaf", + "metadata": {}, + "outputs": [], + "source": [ + "__global__ void matmulKernel(double* d_a, double* d_b, double* d_result, int rowsA, int colsA, int colsB) {\n", + " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", + " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", + "\n", + " if (row < rowsA && col < colsB) {\n", + " double value = 0.0;\n", + " for (int k = 0; k < colsA; k++) {\n", + " value += d_a[row * colsA + k] * d_b[k * colsB + col];\n", + " }\n", + " d_result[row * colsB + col] = value;\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "18f843c9-9aee-4055-aae7-473c8f121037", + "metadata": {}, + "outputs": [], + "source": [ + "__global__ void vecsubKernel(const double* a, const double* b, double* result, int len) {\n", + " int idx = blockIdx.x * blockDim.x + threadIdx.x;\n", + " if (idx < len) {\n", + " result[idx] = a[idx] - b[idx];\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "d8bb436d-238b-488f-a8a8-b8e1a4a0150d", + "metadata": {}, + "source": [ + "### Defining functions to run CUDA kernels and obtain the results:\n", + "This allows us to run calls to all matrix-matrix, matrix-vector and vector-vector CUDA operations without dealing with CUDA syntax" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6ff06da4-1430-4720-affb-b9c8b1dc7a26", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix addition\n", - "std::vector> matadd(const std::vector>& a, const std::vector>& b) {\n", + "std::vector> mataddCUDA(const std::vector>& a, const std::vector>& b) {\n", " int rows = a.size();\n", " int cols = a[0].size();\n", " \n", - " std::vector> result(rows, std::vector(cols));\n", + " double* h_a = new double[rows*cols];\n", + " double* h_b = new double[rows*cols];\n", + " double* h_c = new double[rows*cols];\n", + " \n", + " for (int i = 0; i < rows; i++) {\n", + " for (int j = 0; j < cols; j++) {\n", + " h_a[i*cols + j] = a[i][j];\n", + " h_b[i*cols + j] = b[i][j];\n", + " }\n", + " }\n", + " \n", + " double* d_a, * d_b, * d_c;\n", + " cudaMalloc((void**)&d_a, rows*cols*sizeof(double));\n", + " cudaMalloc((void**)&d_b, rows*cols*sizeof(double));\n", + " cudaMalloc((void**)&d_c, rows*cols*sizeof(double));\n", + "\n", + " cudaMemcpy(d_a, h_a, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", + " cudaMemcpy(d_b, h_b, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", "\n", + " dim3 dimBlock(16, 16);\n", + " dim3 dimGrid((cols + dimBlock.x - 1) / dimBlock.x, (rows + dimBlock.y - 1) / dimBlock.y);\n", + " matAddKernel<<>>(d_a, d_b, d_c, rows, cols);\n", + "\n", + " cudaMemcpy(h_c, d_c, rows*cols*sizeof(double), cudaMemcpyDeviceToHost);\n", + "\n", + " cudaFree(d_a);\n", + " cudaFree(d_b);\n", + " cudaFree(d_c);\n", + "\n", + " std::vector> result(rows, std::vector(cols));\n", " for (int i = 0; i < rows; i++) {\n", " for (int j = 0; j < cols; j++) {\n", - " result[i][j] = a[i][j] + b[i][j];\n", + " result[i][j] = h_c[i*cols + j];\n", " }\n", " }\n", "\n", + " delete[] h_a;\n", + " delete[] h_b;\n", + " delete[] h_c;\n", + "\n", " return result;\n", "}" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "id": "b70f7f32-b7c2-4a1c-96a0-aea42b15f2aa", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix subtraction\n", - "std::vector> matsub(const std::vector>& a, const std::vector>& b) {\n", + "std::vector> matsubCUDA(const std::vector>& a, const std::vector>& b) {\n", " int rows = a.size();\n", " int cols = a[0].size();\n", " \n", - " std::vector> result(rows, std::vector(cols));\n", + " double* h_a = new double[rows*cols];\n", + " double* h_b = new double[rows*cols];\n", + " double* h_c = new double[rows*cols];\n", + " \n", + " for (int i = 0; i < rows; i++) {\n", + " for (int j = 0; j < cols; j++) {\n", + " h_a[i*cols + j] = a[i][j];\n", + " h_b[i*cols + j] = b[i][j];\n", + " }\n", + " }\n", + "\n", + " double* d_a, * d_b, * d_c;\n", + " cudaMalloc((void**)&d_a, rows*cols*sizeof(double));\n", + " cudaMalloc((void**)&d_b, rows*cols*sizeof(double));\n", + " cudaMalloc((void**)&d_c, rows*cols*sizeof(double));\n", + "\n", + " cudaMemcpy(d_a, h_a, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", + " cudaMemcpy(d_b, h_b, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", "\n", + " dim3 dimBlock(16, 16); \n", + " dim3 dimGrid((cols + dimBlock.x - 1) / dimBlock.x, (rows + dimBlock.y - 1) / dimBlock.y);\n", + " matSubKernel<<>>(d_a, d_b, d_c, rows, cols);\n", + "\n", + " cudaMemcpy(h_c, d_c, rows*cols*sizeof(double), cudaMemcpyDeviceToHost);\n", + "\n", + " cudaFree(d_a);\n", + " cudaFree(d_b);\n", + " cudaFree(d_c);\n", + "\n", + " std::vector> result(rows, std::vector(cols));\n", " for (int i = 0; i < rows; i++) {\n", " for (int j = 0; j < cols; j++) {\n", - " result[i][j] = a[i][j] - b[i][j];\n", + " result[i][j] = h_c[i*cols + j];\n", " }\n", " }\n", "\n", + " delete[] h_a;\n", + " delete[] h_b;\n", + " delete[] h_c;\n", + "\n", " return result;\n", "}" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "id": "86b4a452-f04c-45a4-b69b-d87bcb2e5954", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix transpose\n", - "std::vector> mattranspose(const std::vector>& a) {\n", + "std::vector> mattransposeCUDA(const std::vector>& a) {\n", " int rows = a.size();\n", " int cols = a[0].size();\n", " \n", + " double* h_a = new double[rows*cols];\n", + " double* h_c = new double[cols*rows];\n", + " \n", + " for (int i = 0; i < rows; i++) {\n", + " for (int j = 0; j < cols; j++) {\n", + " h_a[i*cols + j] = a[i][j];\n", + " }\n", + " }\n", + "\n", + " double* d_a, * d_c;\n", + " cudaMalloc((void**)&d_a, rows*cols*sizeof(double));\n", + " cudaMalloc((void**)&d_c, cols*rows*sizeof(double));\n", + "\n", + " cudaMemcpy(d_a, h_a, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", + "\n", + " dim3 dimBlock(16, 16);\n", + " dim3 dimGrid((cols + dimBlock.x - 1) / dimBlock.x, (rows + dimBlock.y - 1) / dimBlock.y);\n", + " matTransposeKernel<<>>(d_a, d_c, rows, cols);\n", + "\n", + " cudaMemcpy(h_c, d_c, cols*rows*sizeof(double), cudaMemcpyDeviceToHost);\n", + "\n", + " cudaFree(d_a);\n", + " cudaFree(d_c);\n", + "\n", " std::vector> result(cols, std::vector(rows));\n", + " for (int i = 0; i < cols; i++) {\n", + " for (int j = 0; j < rows; j++) {\n", + " result[i][j] = h_c[i*rows + j];\n", + " }\n", + " }\n", + "\n", + " delete[] h_a;\n", + " delete[] h_c;\n", + "\n", + " return result;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3a43038a-89d4-4d90-9b2c-bc3bf87ca9ab", + "metadata": {}, + "source": [ + "### Function to obtain 2x2 and 3x3 matrix inverse using adjoint:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0d925dce-377c-4d23-96d6-9c4fead297cb", + "metadata": {}, + "outputs": [], + "source": [ + "std::vector matvecmulCUDA(const std::vector>& a, const std::vector& b) {\n", + " int rows = a.size();\n", + " int cols = a[0].size();\n", "\n", + " if (cols != b.size()) {\n", + " throw std::runtime_error(\"Matrix dims do not match for multiplication.\");\n", + " }\n", + "\n", + " std::vector h_mat(rows * cols);\n", " for (int i = 0; i < rows; i++) {\n", " for (int j = 0; j < cols; j++) {\n", - " result[j][i] = a[i][j];\n", + " h_mat[i * cols + j] = a[i][j];\n", " }\n", " }\n", "\n", + " double *d_mat, *d_vec, *d_result;\n", + " cudaMalloc((void**)&d_mat, rows * cols * sizeof(double));\n", + " cudaMalloc((void**)&d_vec, cols * sizeof(double));\n", + " cudaMalloc((void**)&d_result, rows * sizeof(double));\n", + "\n", + " cudaMemcpy(d_mat, h_mat.data(), rows * cols * sizeof(double), cudaMemcpyHostToDevice);\n", + " cudaMemcpy(d_vec, b.data(), cols * sizeof(double), cudaMemcpyHostToDevice);\n", + "\n", + " int blockSize = 256;\n", + " int gridSize = (rows + blockSize - 1) / blockSize;\n", + "\n", + " matvecmulKernel<<>>(d_mat, d_vec, d_result, rows, cols);\n", + "\n", + " std::vector result(rows);\n", + " cudaMemcpy(result.data(), d_result, rows * sizeof(double), cudaMemcpyDeviceToHost);\n", + "\n", + " cudaFree(d_mat);\n", + " cudaFree(d_vec);\n", + " cudaFree(d_result);\n", + "\n", + " return result;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "69714460-9a3e-4966-9de7-5e4111fc40af", + "metadata": {}, + "outputs": [], + "source": [ + "std::vector> matmulCUDA(const std::vector>& a, const std::vector>& b) {\n", + " int rowsA = a.size();\n", + " int colsA = a[0].size();\n", + " int rowsB = b.size();\n", + " int colsB = b[0].size();\n", + "\n", + " if (colsA != rowsB) {\n", + " throw std::runtime_error(\"Matrix dims do not match for multiplication.\");\n", + " }\n", + "\n", + " std::vector h_a(rowsA * colsA);\n", + " std::vector h_b(rowsB * colsB);\n", + "\n", + " for (int i = 0; i < rowsA; i++) {\n", + " for (int j = 0; j < colsA; j++) {\n", + " h_a[i * colsA + j] = a[i][j];\n", + " }\n", + " }\n", + "\n", + " for (int i = 0; i < rowsB; i++) {\n", + " for (int j = 0; j < colsB; j++) {\n", + " h_b[i * colsB + j] = b[i][j];\n", + " }\n", + " }\n", + "\n", + " double *d_a, *d_b, *d_result;\n", + " \n", + " cudaMalloc((void**)&d_a, rowsA * colsA * sizeof(double));\n", + " cudaMalloc((void**)&d_b, rowsB * colsB * sizeof(double));\n", + " cudaMalloc((void**)&d_result, rowsA * colsB * sizeof(double));\n", + "\n", + " cudaMemcpy(d_a, h_a.data(), rowsA * colsA * sizeof(double), cudaMemcpyHostToDevice);\n", + " cudaMemcpy(d_b, h_b.data(), rowsB * colsB * sizeof(double), cudaMemcpyHostToDevice);\n", + "\n", + " dim3 blockSize(16, 16);\n", + " dim3 gridSize((colsB + blockSize.x - 1) / blockSize.x, (rowsA + blockSize.y - 1) / blockSize.y);\n", + "\n", + " matmulKernel<<>>(d_a, d_b, d_result, rowsA, colsA, colsB);\n", + "\n", + " std::vector h_result(rowsA * colsB);\n", + " cudaMemcpy(h_result.data(), d_result, rowsA * colsB * sizeof(double), cudaMemcpyDeviceToHost);\n", + "\n", + " cudaFree(d_a);\n", + " cudaFree(d_b);\n", + " cudaFree(d_result);\n", + " \n", + " std::vector> result(rowsA, std::vector(colsB));\n", + " for (int i = 0; i < rowsA; i++) {\n", + " for (int j = 0; j < colsB; j++) {\n", + " result[i][j] = h_result[i * colsB + j];\n", + " }\n", + " }\n", + "\n", + " return result;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "7295e929-dcfc-43be-9b3b-82e9f9fbf9db", + "metadata": {}, + "outputs": [], + "source": [ + "std::vector vecsubCUDA(const std::vector& a, const std::vector& b) {\n", + " int len = a.size();\n", + "\n", + " double* d_a;\n", + " double* d_b;\n", + " double* d_result;\n", + "\n", + " cudaMalloc((void**)&d_a, len * sizeof(double));\n", + " cudaMalloc((void**)&d_b, len * sizeof(double));\n", + " cudaMalloc((void**)&d_result, len * sizeof(double));\n", + "\n", + " cudaMemcpy(d_a, a.data(), len * sizeof(double), cudaMemcpyHostToDevice);\n", + " cudaMemcpy(d_b, b.data(), len * sizeof(double), cudaMemcpyHostToDevice);\n", + "\n", + " int blockSize = 256; \n", + " int gridSize = (len + blockSize - 1) / blockSize;\n", + "\n", + " vecsubKernel<<>>(d_a, d_b, d_result, len);\n", + "\n", + " std::vector result(len);\n", + " cudaMemcpy(result.data(), d_result, len * sizeof(double), cudaMemcpyDeviceToHost);\n", + "\n", + " cudaFree(d_a);\n", + " cudaFree(d_b);\n", + " cudaFree(d_result);\n", + "\n", " return result;\n", "}\n" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "bf413652-f71a-4b91-9e6e-5520812dc3cb", + "execution_count": 14, + "id": "cb968a24-75c4-491f-9930-0f16a48f7560", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix inversion\n", "std::vector> matinverse(const std::vector>& a) {\n", " size_t n = a.size();\n", " \n", @@ -113,7 +482,7 @@ " // Handle 1x1 matrix\n", " if (n == 1) {\n", " if (a[0][0] == 0) {\n", - " throw std::runtime_error(\"Matrix is singular and cannot be inverted.\");\n", + " throw std::runtime_error(\"singular matrix\");\n", " }\n", " return {{1.0 / a[0][0]}};\n", " }\n", @@ -121,7 +490,7 @@ " if (n == 2) {\n", " double determinant = a[0][0] * a[1][1] - a[0][1] * a[1][0];\n", " if (determinant == 0) {\n", - " throw std::runtime_error(\"Matrix is singular and cannot be inverted.\");\n", + " throw std::runtime_error(\"singular matrix\");\n", " }\n", "\n", " std::vector> result(2, std::vector(2));\n", @@ -139,7 +508,7 @@ " + a[0][2]*(a[1][0]*a[2][1]-a[1][1]*a[2][0]);\n", " \n", " if (determinant == 0) {\n", - " throw std::runtime_error(\"Matrix is singular and cannot be inverted.\");\n", + " throw std::runtime_error(\"singular matrix\");\n", " }\n", "\n", " std::vector> result(3, std::vector(3));\n", @@ -157,118 +526,117 @@ " return result;\n", " }\n", "\n", - " throw std::runtime_error(\"Only 2x2 and 3x3 matrices supported for inversion in this function.\");\n", - "}\n", - "\n" + " throw std::runtime_error(\"Only 2x2 and 3x3 matrices supported for inversion\");\n", + "}\n" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "8c4b2db6-0847-4c0f-a8f7-88339d8257d7", + "execution_count": 15, + "id": "84f29c1e-d4c2-4680-a579-14d63a2bde08", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix-vector subtraction\n", - "std::vector> matvecsub(const std::vector>& mat, const std::vector& vec) {\n", - " std::vector> result(mat.size(), std::vector(vec.size()));\n", - "\n", - " for (size_t i = 0; i < mat.size(); i++) {\n", - " for (size_t j = 0; j < vec.size(); j++) {\n", - " result[i][j] = mat[i][j] - vec[j];\n", + "void printMatrix(const std::vector>& matrix) {\n", + " for (size_t i = 0; i < matrix.size(); i++) {\n", + " for (size_t j = 0; j < matrix[i].size(); j++) {\n", + " std::cout << matrix[i][j] << \" \";\n", " }\n", + " std::cout << std::endl;\n", " }\n", - "\n", - " return result;\n", "}" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "0d925dce-377c-4d23-96d6-9c4fead297cb", + "execution_count": 16, + "id": "fa143e89-d1c5-4b54-b22d-9921d7e1000d", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix-vector multiplication\n", - "std::vector matvecmul(const std::vector>& a, const std::vector& b) {\n", - " int rows = a.size();\n", - " int cols = a[0].size();\n", - "\n", - " if (cols != b.size()) {\n", - " throw std::runtime_error(\"Matrix dimensions do not match for multiplication.\");\n", - " }\n", - "\n", - " std::vector result(rows, 0.0);\n", - "\n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " result[i] += a[i][j] * b[j];\n", + "void printMatrix(const std::vector& vec) {\n", + " for (size_t i = 0; i < vec.size(); i++) {\n", + " std::cout << vec[i] << \" \";\n", " }\n", - " }\n", - "\n", - " return result;\n", - "}" + " std::cout << std::endl;\n", + " }\n" + ] + }, + { + "cell_type": "markdown", + "id": "79ebaf27-693f-4213-800d-8e8b5c9abaa3", + "metadata": {}, + "source": [ + "### Test for matrix-matrix multiplication using CUDA" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "69714460-9a3e-4966-9de7-5e4111fc40af", + "execution_count": 17, + "id": "6d718c47-3bc6-4c7a-893c-4fedc399926d", "metadata": {}, "outputs": [], "source": [ - "// helper function: matrix-matrix multiplication\n", - "std::vector> matmul(const std::vector>& a, const std::vector>& b) {\n", - " int rowsA = a.size();\n", - " int colsA = a[0].size();\n", - " int rowsB = b.size();\n", - " int colsB = b[0].size();\n", - "\n", - " if (colsA != rowsB) {\n", - " throw std::runtime_error(\"Matrix dimensions do not match for multiplication.\");\n", - " }\n", + "std::vector> matrixA = {\n", + " {1.0, 2.0},\n", + " {3.0, 4.0},\n", + " {5.0, 6.0}\n", + " };\n", "\n", - " std::vector> result(rowsA, std::vector(colsB, 0));\n", + "std::vector> matrixB = {\n", + " {1.0, 2.0, 3.0, 4.0},\n", + " {5.0, 6.0, 7.0, 8.0}\n", + " };\n", "\n", - " for (int i = 0; i < rowsA; i++) {\n", - " for (int j = 0; j < colsB; j++) {\n", - " for (int k = 0; k < colsA; k++) {\n", - " result[i][j] += a[i][k] * b[k][j];\n", - " }\n", - " }\n", - " }\n", - "\n", - " return result;\n", - "}" + "std::vector> result = matmulCUDA(matrixA, matrixB);" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "7295e929-dcfc-43be-9b3b-82e9f9fbf9db", + "execution_count": 18, + "id": "3e1c925e-c164-46af-8e0f-65c05f0ee1c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result matrix:\n", + "11 14 17 20 \n", + "23 30 37 44 \n", + "35 46 57 68 \n" + ] + } + ], "source": [ - "// helper function: vector subtraction\n", - "std::vector vecsub(const std::vector& a, const std::vector& b) {\n", - " size_t len = a.size();\n", - " std::vector result(len);\n", - " for (size_t i = 0; i < len; i++) {\n", - " result[i] = a[i] - b[i];\n", - " }\n", - " return result;\n", - "}" + "std::cout << \"Result matrix:\" << std::endl;\n", + "printMatrix(result);" + ] + }, + { + "cell_type": "markdown", + "id": "9312a581-d000-4da6-b257-b22a7da942f6", + "metadata": {}, + "source": [ + "### Set up the `KalmanFilter` class and constructor\n", + "\n", + " Here we create a Kalman filter with the specified matrices:\n", + " \n", + " - A - System dynamics matrix, here we model 1D motion with a *(Position x, Velocity v, Acceleration a)* model\n", + " - C - Output matrix\n", + " - Q - Covariance matrix of the process noise random variable *p(w) ~ N(0, Q)*\n", + " - R - Covariance matrix of the measurement noise random variable *p(v) ~ N(0, R)*\n", + " - P - Estimate error covariance\n", + " " ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 19, "id": "f4c9238f-674a-4cf8-8128-840cc7f61e0f", "metadata": {}, "outputs": [], "source": [ - "//set up class and constructor\n", "KalmanFilter::KalmanFilter(\n", " double dt,\n", " const std::vector>& A,\n", @@ -288,76 +656,121 @@ "KalmanFilter::KalmanFilter() {}" ] }, + { + "cell_type": "markdown", + "id": "cb0ecf07-b676-4a2f-bb91-4d0ebfe4fc17", + "metadata": {}, + "source": [ + "#### Describe the `KalmanFilter::init` function to initialize the filter with initial states as zero." + ] + }, { "cell_type": "code", - "execution_count": 12, - "id": "bd953ec6-ceed-489c-8e33-8eb7ae6bb32f", + "execution_count": 20, + "id": "ed99dd28-5959-409a-8226-7e292bc4b0de", "metadata": {}, "outputs": [], "source": [ - "// init the kf states + measurements \n", - "void KalmanFilter::init(double t0, const std::vector& x0) {\n", - " x_hat = x0;\n", + "void KalmanFilter::init() {\n", + " std::fill(x_hat.begin(), x_hat.end(), 0.0);\n", " P = P0;\n", - " this->t0 = t0;\n", + " t0 = 0;\n", " t = t0;\n", " initialized = true;\n", "}" ] }, + { + "cell_type": "markdown", + "id": "f4291823-8a85-40bf-baf7-c0686388e435", + "metadata": {}, + "source": [ + "#### Describe the `KalmanFilter::init` function to initialize the filter with a guess for initial states." + ] + }, { "cell_type": "code", - "execution_count": 13, - "id": "2840f0f7-52de-4f47-a9dc-e257235df002", + "execution_count": 21, + "id": "bd953ec6-ceed-489c-8e33-8eb7ae6bb32f", "metadata": {}, "outputs": [], "source": [ - "void KalmanFilter::init() {\n", - " std::fill(x_hat.begin(), x_hat.end(), 0.0);\n", + "void KalmanFilter::init(double t0, const std::vector& x0) {\n", + " x_hat = x0;\n", " P = P0;\n", - " t0 = 0;\n", + " this->t0 = t0;\n", " t = t0;\n", " initialized = true;\n", "}" ] }, + { + "cell_type": "markdown", + "id": "3b71922a-0448-4fa0-8196-08142c2f64b8", + "metadata": {}, + "source": [ + "#### Running the Kalman Filter Algorithm:\n", + "\n", + "The update function runs the two stages of the ongoing discrete Kalman filter cycle:\n", + "\n", + " - Discrete Kalman filter time update equations:\n", + " \n", + " $\\begin{aligned} & \\hat{x}_k^{-}=A \\hat{x}_{k-1} \\\\ & P_k^{-}=A P_{k-1} A^T+Q\\end{aligned}$\n", + " \n", + " They project the state and covariance estimates forward from time step *k* ā€“ 1 to step *k*.\n", + " \n", + " - Discrete Kalman filter measurement update equations: \n", + "\n", + " $\\begin{gathered}K_k=P_k^{-} H^T\\left(H P_k^{-} H^T+R\\right)^{-1} \\\\ \\hat{x}_k=\\hat{x}_k^{-}+K_k\\left(z_k-H \\hat{x}_k^{-}\\right) \\\\ P_k=\\left(I-K_k H\\right) P_k^{-}\\end{gathered}$\n", + " \n", + " The Kalman Gain is computed in this step" + ] + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 22, "id": "ea057f20-9097-4dd3-946e-ab7a383afe69", "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", - "void KalmanFilter::update(const std::vector& y) {\n", + "std::vector KalmanFilter::update(const std::vector& y) {\n", " if (!initialized)\n", " throw std::runtime_error(\"Filter is not initialized!\");\n", - "\n", - " x_hat_new = matvecmul(A, x_hat);\n", - "\n", - " P = matadd(matmul(matmul(A, P), mattranspose(A)), Q);\n", - "\n", - " std::vector> inv = matinverse(matadd(matmul(matmul(C, P), mattranspose(C)), R));\n", - "\n", - " K = matmul(matmul(P, mattranspose(C)), inv);\n", - " std::vector temp = matvecmul(C, x_hat_new);\n", - " std::vector difference = vecsub(y, temp);\n", - "\n", + " \n", + " // Discrete Kalman filter time update \n", + " x_hat_new = matvecmulCUDA(A, x_hat);\n", + " P = mataddCUDA(matmulCUDA(matmulCUDA(A, P), mattransposeCUDA(A)), Q);\n", + " \n", + " // Discrete Kalman filter measurement update\n", + " std::vector> inv = matinverse(mataddCUDA(matmulCUDA(matmulCUDA(C, P), mattransposeCUDA(C)), R));\n", + " K = matmulCUDA(matmulCUDA(P, mattransposeCUDA(C)), inv);\n", + " std::vector temp = matvecmulCUDA(C, x_hat_new);\n", + " std::vector difference = vecsubCUDA(y, temp);\n", + " std::vector gain = K[0];\n", " for (size_t i = 0; i < x_hat_new.size(); i++) {\n", - " x_hat_new[i] += matvecmul(K, difference)[i];\n", + " x_hat_new[i] += matvecmulCUDA(K, difference)[i];\n", " }\n", "\n", - " P = matmul(matsub(I, matmul(K, C)), P);\n", + " P = matmulCUDA(matsubCUDA(I, matmulCUDA(K, C)), P);\n", "\n", " x_hat = x_hat_new;\n", " t += dt;\n", + " \n", + " return gain;\n", "}\n" ] }, + { + "cell_type": "markdown", + "id": "2b1d8963-599a-4034-80de-e7c92c59c321", + "metadata": {}, + "source": [ + "#### Update the estimated state based on measured values, using the given time step and dynamics matrix:" + ] + }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 23, "id": "08d28163-a086-46a9-bf41-044f48530929", "metadata": {}, "outputs": [], @@ -370,41 +783,41 @@ ] }, { - "cell_type": "code", - "execution_count": 16, - "id": "80d35cbe-7baa-4bc2-a232-4b5631af59b2", + "cell_type": "markdown", + "id": "54fb13b2-51ca-4dda-8503-151aea8dbd68", "metadata": {}, - "outputs": [], "source": [ - "std::vector generateExpandedData(const std::vector& baseData, int repetitions) {\n", - " std::vector expandedData;\n", - " for (int r = 0; r < repetitions; r++) {\n", - " for (auto val : baseData) {\n", - " // Add slight random perturbation to the data.\n", - " double perturbedValue = val + ((double)rand() / RAND_MAX - 0.5);\n", - " expandedData.push_back(perturbedValue);\n", - " }\n", - " }\n", - " return expandedData;\n", - "}" + "### Putting it all together:\n", + "We define the `run_kf` function that initialises the following:\n", + "- Number of states\n", + "- Dimensions of eacch measurement\n", + "- Time steps used $\\begin{aligned} & {dt}\\end{aligned}$\n", + "- Covariance matrices\n", + "- Passes the measurement data to $\\begin{aligned} & \\hat{x}_0^{-}\\end{aligned}$\n", + "\n", + "Then the measurements are fed into filter, and output estimated states\n", + "\n", + "The algorithm then runs by looping over the measurements:\n", + "\n", + "$\\begin{gathered} & for\\:y\\:in\\:measurements\\::\\end{gathered}$\n", + "$\\begin{gathered} & kf.update(y)\\end{gathered}$" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 24, "id": "8938a288-fc2c-4104-82c6-added4466c8c", "metadata": {}, "outputs": [], "source": [ - "std::vector> run_kf(int expansion_factor, bool verbose) {\n", + "std::vector> run_kf(bool verbose) {\n", " \n", - " int n = 3; // Number of states\n", - " int m = 1; // Number of measurements\n", + " int n = 3; \n", + " int m = 1; \n", "\n", - " double dt = 1.0 / 40; // Time step\n", + " double dt = 1.0 / 30; // Time step\n", " \n", " std::vector g_preds;\n", - " std::vector g_est;\n", " \n", " std::vector> A(n, std::vector(n));\n", " std::vector> C(m, std::vector(n));\n", @@ -421,47 +834,58 @@ " KalmanFilter kf(dt, A, C, Q, R, P);\n", " \n", " std::vector measurements = {\n", - " 1.04202710058, 1.10726790452, 1.2913511148, 1.48485250951, 1.72825901034,\n", - " 1.74216489744, 2.11672039768, 2.14529225112, 2.16029641405, 2.21269371128,\n", - " 2.57709350237, 2.6682215744, 2.51641839428, 2.76034056782, 2.88131780617,\n", - " 2.88373786518, 2.9448468727, 2.82866600131, 3.0006601946, 3.12920591669,\n", - " 2.858361783, 2.83808170354, 2.68975330958, 2.66533185589, 2.81613499531,\n", - " 2.81003612051, 2.88321849354, 2.69789264832, 2.4342229249, 2.23464791825,\n", - " 2.30278776224, 2.02069770395, 1.94393985809, 1.82498398739, 1.52526230354,\n", - " 1.86967808173, 1.18073207847, 1.10729605087, 0.916168349913, 0.678547664519,\n", - " 0.562381751596, 0.355468474885, -0.155607486619, -0.287198661013, -0.602973173813,\n", - " -0.873817307503, -1.144661441193, -1.415505574883, -1.686349708573, -1.957193842263, \n", - " -2.228037975953, -2.498882109643, -2.769726243333, -3.040570377023, -3.311414510713, \n", - " -3.582258644403, -3.853102778093, -4.123946911783, -4.394791045473, -4.665635179163, \n", - " -4.936479312853, -5.207323446543, -5.478167580233, -5.749011713923, -6.019855847613,\n", - " -6.290699981303, -6.561544114993, -6.832388248683, -7.103232382373, -7.374076516063,\n", - " -7.644920649753, -7.915764783443, -8.186608917133, -8.457453050823, -8.728297184513, \n", - " -8.999141318203, -9.269985451893, -9.540829585583, -9.811673719273, -10.082517852963,\n", - " -10.353361986653, -10.624206120343, -10.895050254033, -11.165894387723, -11.436738521413,\n", - " -11.707582655103, -11.978426788793, -12.249270922483, -12.520115056173\n", - " };\n", - " \n", - " // std::vector expandedMeasurements = generateExpandedData(measurements, expansion_factor);\n", - " \n", - " std::vector x0 = {measurements[0], 0, 0};\n", + " 1.04202710058,1.10726790452,1.2913511148,1.48485250951,1.72825901034,1.74216489744,\n", + " 2.11672039768,2.14529225112,2.16029641405,2.21269371128,2.57709350237,2.6682215744,\n", + " 2.51641839428,2.76034056782,2.88131780617,2.88373786518,2.9448468727,2.82866600131,\n", + " 3.0006601946,3.12920591669,2.858361783,2.83808170354,2.68975330958,2.66533185589,\n", + " 2.81613499531,2.81003612051,2.88321849354,2.69789264832,2.4342229249,2.23464791825,\n", + " 2.30278776224,2.02069770395,1.94393985809,1.82498398739,1.52526230354,1.86967808173,\n", + " 1.18073207847,1.10729605087,0.916168349913,0.678547664519,0.562381751596,0.355468474885,\n", + " -0.155607486619,-0.287198661013,-0.602973173813,-0.292648661013,-0.602973173813,\n", + " -0.924197686613,-1.2563221994129998,-1.5993467122129998,-1.9532712250129998,-2.3180957378129996,\n", + " -2.6938202506129993,-3.0804447634129994,-3.4779692762129994,-3.8863937890129994,-4.305718301812999,\n", + " -4.735942814612999,-5.177067327412999,-5.629091840212999,-6.0920163530129985,-6.565840865812999,\n", + " -7.0505653786129985,-7.546189891412999,-8.052714404212999,-8.570138917012999,-9.098463429812998,\n", + " -9.637687942612999,-10.187812455412999,-10.748836968212998,-11.320761481013,-11.903585993813,\n", + " -12.497310506613,-13.101935019413,-13.717459532213,-14.343884045013,-14.981208557813002,\n", + " -15.629433070613002,-16.288557583413002,-16.958582096213004,-17.639506609013004,\n", + " -18.331331121813005,-19.034055634613004,-19.747680147413003,-20.472204660213006,\n", + " -21.207629173013007,-21.95395368581301,-22.71117819861301,-23.47930271141301,\n", + " -24.25832722421301,-25.04825173701301,-25.849076249813013,-26.660800762613015,\n", + " -27.483425275413015,-28.316949788213016,-29.161374301013016,-30.01669881381302,\n", + " -30.88292332661302,-31.760047839413023,-32.64807235221303,-33.54699686501303,\n", + " -34.45682137781303,-35.37754589061303,-36.30917040341303,-37.251694916213026,\n", + " -38.20511942901303,-39.169443941813036,-40.14466845461304,-41.13079296741304,\n", + " -42.12781748021305,-43.13574199301305,-44.15456650581305,-45.18429101861305,\n", + " -46.22491553141305,-47.276440044213054,-48.33886455701305,-49.41218906981305,\n", + " -50.49641358261306,-51.591538095413064,-52.69756260821307,-53.81448712101307,\n", + " -54.94231163381308,-56.08103614661308,-57.23066065941308,-58.391185172213085,\n", + " -59.562609685013086,-60.74493419781309,-61.93815871061309,-63.14228322341309,\n", + " -64.35730773621309,-65.5832322490131,-66.82005676181309,-68.0677812746131,\n", + " -69.3264057874131,-70.5959303002131,-71.8763548130131,-73.1676793258131,\n", + " -74.46990383861309,-75.7830283514131,-77.1070528642131,-78.4419773770131,\n", + " -79.78780188981311,-81.14452640261311,-82.51215091541312,-83.89067542821311\n", + "\n", + " };\n", + " \n", + " std::vector x0 = {measurements[0], 0, -15};\n", + " std::vector> gain;\n", " kf.init(0, x0);\n", "\n", - " // Feed measurements into filter, output estimated states\n", + " \n", " std::vector y(m);\n", " if(verbose) {\n", " std::cout << \"t = \" << 0 << \", \" << \"x_hat[0]: \";\n", " for (auto& val : kf.state()){\n", " std::cout << val << \" \";\n", " }\n", - " g_est.push_back(kf.state()[2]);\n", - " \n", " std::cout << std::endl;\n", " }\n", " \n", " int i;\n", " for (i = 0; i < measurements.size(); i++) {\n", " y[0] = measurements[i];\n", - " kf.update(y);\n", + " gain.push_back(kf.update(y));\n", " if(verbose) {\n", " std::cout << \"t = \" << (i + 1) * dt << \", y[\" << i << \"] = \" << y[0] << \", x_hat[\" << i << \"] = \";\n", " for (auto& val : kf.state()) {\n", @@ -475,10 +899,10 @@ " std::cout<<\"Exec Success, Final kf states:\";\n", " for (auto& val : kf.state()) std::cout << val << \" \";\n", " std::cout << std::endl;\n", - " \n", + "\n", " std::vector> g_res;\n", " for (size_t i = 0; i < g_preds.size(); ++i) {\n", - " std::vector pair = {g_preds[i], g_est[i]};\n", + " std::vector pair = {g_preds[i], gain[i][0]};\n", " g_res.push_back(pair);\n", " }\n", " \n", @@ -488,167 +912,183 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "1e77f6cc-410a-4cd4-acbc-94677b77c968", - "metadata": {}, - "outputs": [], - "source": [ - "#include \n", - "auto start = std::chrono::high_resolution_clock::now();\n", - "auto stop = std::chrono::high_resolution_clock::now();\n", - "auto duration = std::chrono::duration_cast(stop - start);" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "7ea068c2-fc1f-4ad4-a2c9-aecd66cbbf71", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector> pyrun_sim(int exp_factor, bool verbose) {\n", - " start = std::chrono::high_resolution_clock::now();\n", - " std::vector> g_res = run_kf(exp_factor, verbose);\n", - " stop = std::chrono::high_resolution_clock::now();\n", - " duration = std::chrono::duration_cast(stop - start);\n", - " return g_res;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "6026ea4e-c42b-42e5-97a7-12af37e121a9", + "execution_count": 25, + "id": "7a910405-c4ff-41fa-aec5-69aa24598f9c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "t = 0, x_hat[0]: 1.04203 0 0 \n", - "t = 0.025, y[0] = 1.04203, x_hat[0] = 1.04203 0 0 \n", - "t = 0.05, y[1] = 1.10727, x_hat[1] = 1.08709 0.898405 0.00110609 \n", - "t = 0.075, y[2] = 1.29135, x_hat[2] = 1.22062 2.38572 0.00356059 \n", - "t = 0.1, y[3] = 1.48485, x_hat[3] = 1.3876 3.46905 0.00712492 \n", - "t = 0.125, y[4] = 1.72826, x_hat[4] = 1.58997 4.40683 0.013793 \n", - "t = 0.15, y[5] = 1.74216, x_hat[5] = 1.71702 4.52169 0.0154301 \n", - "t = 0.175, y[6] = 2.11672, x_hat[6] = 1.93313 5.12414 0.031192 \n", - "t = 0.2, y[7] = 2.14529, x_hat[7] = 2.08865 5.26575 0.0374157 \n", - "t = 0.225, y[8] = 2.1603, x_hat[8] = 2.20235 5.18421 0.0316618 \n", - "t = 0.25, y[9] = 2.21269, x_hat[9] = 2.29893 5.04726 0.0173069 \n", - "t = 0.275, y[10] = 2.57709, x_hat[10] = 2.46442 5.19829 0.039685 \n", - "t = 0.3, y[11] = 2.66822, x_hat[11] = 2.61234 5.26324 0.0527055 \n", - "t = 0.325, y[12] = 2.51642, x_hat[12] = 2.69148 5.08935 0.00552738 \n", - "t = 0.35, y[13] = 2.76034, x_hat[13] = 2.80588 5.04887 -0.00849014 \n", - "t = 0.375, y[14] = 2.88132, x_hat[14] = 2.92141 5.01625 -0.0224249 \n", - "t = 0.4, y[15] = 2.88374, x_hat[15] = 3.0137 4.91887 -0.0729322 \n", - "t = 0.425, y[16] = 2.94485, x_hat[16] = 3.09895 4.80966 -0.139272 \n", - "t = 0.45, y[17] = 2.82867, x_hat[17] = 3.14447 4.59753 -0.288577 \n", - "t = 0.475, y[18] = 3.00066, x_hat[18] = 3.21103 4.45686 -0.396929 \n", - "t = 0.5, y[19] = 3.12921, x_hat[19] = 3.287 4.34976 -0.484796 \n", - "t = 0.525, y[20] = 2.85836, x_hat[20] = 3.29866 4.07181 -0.747944 \n", - "t = 0.55, y[21] = 2.83808, x_hat[21] = 3.30007 3.77743 -1.04221 \n", - "t = 0.575, y[22] = 2.68975, x_hat[22] = 3.26982 3.40694 -1.43333 \n", - "t = 0.6, y[23] = 2.66533, x_hat[23] = 3.23376 3.03344 -1.83638 \n", - "t = 0.625, y[24] = 2.81613, x_hat[24] = 3.2232 2.74466 -2.13797 \n", - "t = 0.65, y[25] = 2.81004, x_hat[25] = 3.20765 2.45227 -2.44388 \n", - "t = 0.675, y[26] = 2.88322, x_hat[26] = 3.20161 2.19814 -2.69673 \n", - "t = 0.7, y[27] = 2.69789, x_hat[27] = 3.15898 1.84844 -3.07251 \n", - "t = 0.725, y[28] = 2.43422, x_hat[28] = 3.07035 1.3784 -3.6016 \n", - "t = 0.75, y[29] = 2.23465, x_hat[29] = 2.95232 0.840664 -4.20755 \n", - "t = 0.775, y[30] = 2.30279, x_hat[30] = 2.85557 0.387859 -4.67894 \n", - "t = 0.8, y[31] = 2.0207, x_hat[31] = 2.7166 -0.169629 -5.27548 \n", - "t = 0.825, y[32] = 1.94394, x_hat[32] = 2.57681 -0.704099 -5.81837 \n", - "t = 0.85, y[33] = 1.82498, x_hat[33] = 2.42945 -1.23525 -6.3351 \n", - "t = 0.875, y[34] = 1.52526, x_hat[34] = 2.24401 -1.85275 -6.94502 \n", - "t = 0.9, y[35] = 1.86968, x_hat[35] = 2.13957 -2.19864 -7.17158 \n", - "t = 0.925, y[36] = 1.18073, x_hat[36] = 1.92439 -2.85128 -7.78709 \n", - "t = 0.95, y[37] = 1.1073, x_hat[37] = 1.72092 -3.43475 -8.28638 \n", - "t = 0.975, y[38] = 0.916168, x_hat[38] = 1.50771 -4.01434 -8.75833 \n", - "t = 1, y[39] = 0.678548, x_hat[39] = 1.2784 -4.60797 -9.22649 \n", - "t = 1.025, y[40] = 0.562382, x_hat[40] = 1.05708 -5.14471 -9.60338 \n", - "t = 1.05, y[41] = 0.355468, x_hat[41] = 0.827473 -5.67368 -9.95378 \n", - "t = 1.075, y[42] = -0.155607, x_hat[42] = 0.537759 -6.3418 -10.4545 \n", - "t = 1.1, y[43] = -0.287199, x_hat[43] = 0.262434 -6.93117 -10.8401 \n", - "t = 1.125, y[44] = -0.602973, x_hat[44] = -0.0317108 -7.53829 -11.2291 \n", - "t = 1.15, y[45] = -0.873817, x_hat[45] = -0.333878 -8.13198 -11.5854 \n", - "t = 1.175, y[46] = -1.14466, x_hat[46] = -0.64242 -8.70817 -11.9065 \n", - "t = 1.2, y[47] = -1.41551, x_hat[47] = -0.955917 -9.2638 -12.1908 \n", - "t = 1.225, y[48] = -1.68635, x_hat[48] = -1.27315 -9.79661 -12.4381 \n", - "t = 1.25, y[49] = -1.95719, x_hat[49] = -1.5931 -10.3051 -12.6488 \n", - "t = 1.275, y[50] = -2.22804, x_hat[50] = -1.91487 -10.7882 -12.8241 \n", - "t = 1.3, y[51] = -2.49888, x_hat[51] = -2.23773 -11.2455 -12.9653 \n", - "t = 1.325, y[52] = -2.76973, x_hat[52] = -2.56107 -11.6768 -13.0743 \n", - "t = 1.35, y[53] = -3.04057, x_hat[53] = -2.88438 -12.0825 -13.1531 \n", - "t = 1.375, y[54] = -3.31141, x_hat[54] = -3.20725 -12.4629 -13.2039 \n", - "t = 1.4, y[55] = -3.58226, x_hat[55] = -3.52932 -12.8188 -13.2289 \n", - "t = 1.425, y[56] = -3.8531, x_hat[56] = -3.85034 -13.1508 -13.2301 \n", - "t = 1.45, y[57] = -4.12395, x_hat[57] = -4.17008 -13.46 -13.2098 \n", - "t = 1.475, y[58] = -4.39479, x_hat[58] = -4.48837 -13.7472 -13.17 \n", - "t = 1.5, y[59] = -4.66564, x_hat[59] = -4.8051 -14.0134 -13.1126 \n", - "t = 1.525, y[60] = -4.93648, x_hat[60] = -5.12016 -14.2597 -13.0395 \n", - "t = 1.55, y[61] = -5.20732, x_hat[61] = -5.4335 -14.4872 -12.9525 \n", - "t = 1.575, y[62] = -5.47817, x_hat[62] = -5.74508 -14.6968 -12.853 \n", - "t = 1.6, y[63] = -5.74901, x_hat[63] = -6.05487 -14.8895 -12.7428 \n", - "t = 1.625, y[64] = -6.01986, x_hat[64] = -6.36288 -15.0664 -12.623 \n", - "t = 1.65, y[65] = -6.2907, x_hat[65] = -6.66912 -15.2283 -12.4951 \n", - "t = 1.675, y[66] = -6.56154, x_hat[66] = -6.97361 -15.3763 -12.3601 \n", - "t = 1.7, y[67] = -6.83239, x_hat[67] = -7.27637 -15.5111 -12.2191 \n", - "t = 1.725, y[68] = -7.10323, x_hat[68] = -7.57746 -15.6336 -12.0731 \n", - "t = 1.75, y[69] = -7.37408, x_hat[69] = -7.87691 -15.7446 -11.9229 \n", - "t = 1.775, y[70] = -7.64492, x_hat[70] = -8.17476 -15.8449 -11.7694 \n", - "t = 1.8, y[71] = -7.91576, x_hat[71] = -8.47108 -15.9351 -11.6132 \n", - "t = 1.825, y[72] = -8.18661, x_hat[72] = -8.7659 -16.0159 -11.4551 \n", - "t = 1.85, y[73] = -8.45745, x_hat[73] = -9.05929 -16.0881 -11.2954 \n", - "t = 1.875, y[74] = -8.7283, x_hat[74] = -9.3513 -16.152 -11.1349 \n", - "t = 1.9, y[75] = -8.99914, x_hat[75] = -9.64199 -16.2084 -10.9739 \n", - "t = 1.925, y[76] = -9.26999, x_hat[76] = -9.9314 -16.2578 -10.8128 \n", - "t = 1.95, y[77] = -9.54083, x_hat[77] = -10.2196 -16.3005 -10.652 \n", - "t = 1.975, y[78] = -9.81167, x_hat[78] = -10.5066 -16.3372 -10.4918 \n", - "t = 2, y[79] = -10.0825, x_hat[79] = -10.7925 -16.3682 -10.3325 \n", - "t = 2.025, y[80] = -10.3534, x_hat[80] = -11.0774 -16.3939 -10.1743 \n", - "t = 2.05, y[81] = -10.6242, x_hat[81] = -11.3612 -16.4148 -10.0175 \n", - "t = 2.075, y[82] = -10.8951, x_hat[82] = -11.6441 -16.4311 -9.86215 \n", - "t = 2.1, y[83] = -11.1659, x_hat[83] = -11.9261 -16.4432 -9.70853 \n", - "t = 2.125, y[84] = -11.4367, x_hat[84] = -12.2072 -16.4514 -9.55674 \n", - "t = 2.15, y[85] = -11.7076, x_hat[85] = -12.4874 -16.4559 -9.4069 \n", - "t = 2.175, y[86] = -11.9784, x_hat[86] = -12.7669 -16.4572 -9.25909 \n", - "t = 2.2, y[87] = -12.2493, x_hat[87] = -13.0456 -16.4553 -9.1134 \n", - "t = 2.225, y[88] = -12.5201, x_hat[88] = -13.3236 -16.4505 -8.96991 \n", + "t = 0, x_hat[0]: 1.04203 0 -15 \n", + "t = 0.0333333, y[0] = 1.04203, x_hat[0] = 1.04203 -0.5 -15 \n", + "t = 0.0666667, y[1] = 1.10727, x_hat[1] = 1.08556 -0.0966619 -14.9988 \n", + "t = 0.1, y[2] = 1.29135, x_hat[2] = 1.21317 0.720024 -14.9952 \n", + "t = 0.133333, y[3] = 1.48485, x_hat[3] = 1.36865 1.21707 -14.9881 \n", + "t = 0.166667, y[4] = 1.72826, x_hat[4] = 1.55548 1.60875 -14.9732 \n", + "t = 0.2, y[5] = 1.74216, x_hat[5] = 1.66278 1.38374 -14.9637 \n", + "t = 0.233333, y[6] = 2.11672, x_hat[6] = 1.85606 1.53382 -14.9229 \n", + "t = 0.266667, y[7] = 2.14529, x_hat[7] = 1.98512 1.34018 -14.8908 \n", + "t = 0.3, y[8] = 2.1603, x_hat[8] = 2.06901 0.981678 -14.8682 \n", + "t = 0.333333, y[9] = 2.21269, x_hat[9] = 2.13267 0.58593 -14.8441 \n", + "t = 0.366667, y[10] = 2.57709, x_hat[10] = 2.26319 0.425535 -14.7321 \n", + "t = 0.4, y[11] = 2.66822, x_hat[11] = 2.37389 0.210228 -14.6096 \n", + "t = 0.433333, y[12] = 2.51642, x_hat[12] = 2.41279 -0.189016 -14.56 \n", + "t = 0.466667, y[13] = 2.76034, x_hat[13] = 2.4865 -0.459522 -14.4115 \n", + "t = 0.5, y[14] = 2.88132, x_hat[14] = 2.56093 -0.701772 -14.2172 \n", + "t = 0.533333, y[15] = 2.88374, x_hat[15] = 2.61135 -0.980106 -14.0346 \n", + "t = 0.566667, y[16] = 2.94485, x_hat[16] = 2.65526 -1.24377 -13.8225 \n", + "t = 0.6, y[17] = 2.82867, x_hat[17] = 2.65812 -1.58494 -13.6876 \n", + "t = 0.633333, y[18] = 3.00066, x_hat[18] = 2.68613 -1.81973 -13.4217 \n", + "t = 0.666667, y[19] = 3.12921, x_hat[19] = 2.72799 -1.9816 -13.0631 \n", + "t = 0.7, y[20] = 2.85836, x_hat[20] = 2.70186 -2.30405 -12.9167 \n", + "t = 0.733333, y[21] = 2.83808, x_hat[21] = 2.66841 -2.61016 -12.7523 \n", + "t = 0.766667, y[22] = 2.68975, x_hat[22] = 2.60351 -2.971 -12.6666 \n", + "t = 0.8, y[23] = 2.66533, x_hat[23] = 2.53742 -3.29663 -12.5373 \n", + "t = 0.833333, y[24] = 2.81613, x_hat[24] = 2.50742 -3.47892 -12.2229 \n", + "t = 0.866667, y[25] = 2.81004, x_hat[25] = 2.47782 -3.63091 -11.8847 \n", + "t = 0.9, y[26] = 2.88322, x_hat[26] = 2.46572 -3.70491 -11.4628 \n", + "t = 0.933333, y[27] = 2.69789, x_hat[27] = 2.41597 -3.86954 -11.1819 \n", + "t = 0.966667, y[28] = 2.43422, x_hat[28] = 2.31755 -4.15264 -11.068 \n", + "t = 1, y[29] = 2.23465, x_hat[29] = 2.19065 -4.48803 -11.0261 \n", + "t = 1.03333, y[30] = 2.30279, x_hat[30] = 2.09529 -4.69914 -10.8344 \n", + "t = 1.06667, y[31] = 2.0207, x_hat[31] = 1.95562 -5.01191 -10.7763 \n", + "t = 1.1, y[32] = 1.94394, x_hat[32] = 1.82057 -5.28093 -10.6701 \n", + "t = 1.13333, y[33] = 1.82498, x_hat[33] = 1.68155 -5.53372 -10.5515 \n", + "t = 1.16667, y[34] = 1.52526, x_hat[34] = 1.50284 -5.86969 -10.5338 \n", + "t = 1.2, y[35] = 1.86968, x_hat[35] = 1.42124 -5.91291 -10.1937 \n", + "t = 1.23333, y[36] = 1.18073, x_hat[36] = 1.2154 -6.27594 -10.2188 \n", + "t = 1.26667, y[37] = 1.1073, x_hat[37] = 1.02642 -6.56369 -10.1629 \n", + "t = 1.3, y[38] = 0.916168, x_hat[38] = 0.829169 -6.84703 -10.1055 \n", + "t = 1.33333, y[39] = 0.678548, x_hat[39] = 0.616214 -7.14521 -10.0663 \n", + "t = 1.36667, y[40] = 0.562382, x_hat[40] = 0.41403 -7.39117 -9.97735 \n", + "t = 1.4, y[41] = 0.355468, x_hat[41] = 0.204015 -7.63475 -9.89085 \n", + "t = 1.43333, y[42] = -0.155607, x_hat[42] = -0.0706529 -8.01301 -9.93708 \n", + "t = 1.46667, y[43] = -0.287199, x_hat[43] = -0.328135 -8.32148 -9.91585 \n", + "t = 1.5, y[44] = -0.602973, x_hat[44] = -0.605038 -8.65089 -9.91483 \n", + "t = 1.53333, y[45] = -0.292649, x_hat[45] = -0.781097 -8.72417 -9.68453 \n", + "t = 1.56667, y[46] = -0.602973, x_hat[46] = -0.985005 -8.85119 -9.51267 \n", + "t = 1.6, y[47] = -0.924198, x_hat[47] = -1.21467 -9.02335 -9.38793 \n", + "t = 1.63333, y[48] = -1.25632, x_hat[48] = -1.46826 -9.2333 -9.30101 \n", + "t = 1.66667, y[49] = -1.59935, x_hat[49] = -1.74413 -9.4748 -9.24427 \n", + "t = 1.7, y[50] = -1.95327, x_hat[50] = -2.04085 -9.74255 -9.21144 \n", + "t = 1.73333, y[51] = -2.3181, x_hat[51] = -2.35716 -10.032 -9.19743 \n", + "t = 1.76667, y[52] = -2.69382, x_hat[52] = -2.69196 -10.3394 -9.19807 \n", + "t = 1.8, y[53] = -3.08044, x_hat[53] = -3.04427 -10.6615 -9.20997 \n", + "t = 1.83333, y[54] = -3.47797, x_hat[54] = -3.41323 -10.9956 -9.23039 \n", + "t = 1.86667, y[55] = -3.88639, x_hat[55] = -3.7981 -11.3393 -9.25711 \n", + "t = 1.9, y[56] = -4.30572, x_hat[56] = -4.19822 -11.6907 -9.28834 \n", + "t = 1.93333, y[57] = -4.73594, x_hat[57] = -4.61301 -12.0482 -9.32265 \n", + "t = 1.96667, y[58] = -5.17707, x_hat[58] = -5.04196 -12.4105 -9.3589 \n", + "t = 2, y[59] = -5.62909, x_hat[59] = -5.48464 -12.7763 -9.39618 \n", + "t = 2.03333, y[60] = -6.09202, x_hat[60] = -5.94065 -13.1448 -9.43379 \n", + "t = 2.06667, y[61] = -6.56584, x_hat[61] = -6.40966 -13.5151 -9.47117 \n", + "t = 2.1, y[62] = -7.05057, x_hat[62] = -6.89136 -13.8865 -9.5079 \n", + "t = 2.13333, y[63] = -7.54619, x_hat[63] = -7.38549 -14.2587 -9.54365 \n", + "t = 2.16667, y[64] = -8.05271, x_hat[64] = -7.89184 -14.6309 -9.57819 \n", + "t = 2.2, y[65] = -8.57014, x_hat[65] = -8.41019 -15.003 -9.61135 \n", + "t = 2.23333, y[66] = -9.09846, x_hat[66] = -8.94038 -15.3747 -9.64301 \n", + "t = 2.26667, y[67] = -9.63769, x_hat[67] = -9.48225 -15.7456 -9.67311 \n", + "t = 2.3, y[68] = -10.1878, x_hat[68] = -10.0357 -16.1156 -9.70161 \n", + "t = 2.33333, y[69] = -10.7488, x_hat[69] = -10.6005 -16.4845 -9.7285 \n", + "t = 2.36667, y[70] = -11.3208, x_hat[70] = -11.1767 -16.8523 -9.75379 \n", + "t = 2.4, y[71] = -11.9036, x_hat[71] = -11.7642 -17.2188 -9.77751 \n", + "t = 2.43333, y[72] = -12.4973, x_hat[72] = -12.3628 -17.5841 -9.7997 \n", + "t = 2.46667, y[73] = -13.1019, x_hat[73] = -12.9725 -17.9479 -9.82041 \n", + "t = 2.5, y[74] = -13.7175, x_hat[74] = -13.5932 -18.3104 -9.83969 \n", + "t = 2.53333, y[75] = -14.3439, x_hat[75] = -14.225 -18.6716 -9.8576 \n", + "t = 2.56667, y[76] = -14.9812, x_hat[76] = -14.8677 -19.0313 -9.87422 \n", + "t = 2.6, y[77] = -15.6294, x_hat[77] = -15.5214 -19.3897 -9.88959 \n", + "t = 2.63333, y[78] = -16.2886, x_hat[78] = -16.1859 -19.7467 -9.90379 \n", + "t = 2.66667, y[79] = -16.9586, x_hat[79] = -16.8613 -20.1025 -9.91688 \n", + "t = 2.7, y[80] = -17.6395, x_hat[80] = -17.5475 -20.4569 -9.92892 \n", + "t = 2.73333, y[81] = -18.3313, x_hat[81] = -18.2446 -20.8101 -9.93998 \n", + "t = 2.76667, y[82] = -19.0341, x_hat[82] = -18.9524 -21.162 -9.9501 \n", + "t = 2.8, y[83] = -19.7477, x_hat[83] = -19.6711 -21.5128 -9.95936 \n", + "t = 2.83333, y[84] = -20.4722, x_hat[84] = -20.4006 -21.8624 -9.96781 \n", + "t = 2.86667, y[85] = -21.2076, x_hat[85] = -21.1408 -22.2109 -9.97549 \n", + "t = 2.9, y[86] = -21.954, x_hat[86] = -21.8918 -22.5584 -9.98246 \n", + "t = 2.93333, y[87] = -22.7112, x_hat[87] = -22.6535 -22.9048 -9.98876 \n", + "t = 2.96667, y[88] = -23.4793, x_hat[88] = -23.4261 -23.2503 -9.99445 \n", + "t = 3, y[89] = -24.2583, x_hat[89] = -24.2094 -23.5948 -9.99955 \n", + "t = 3.03333, y[90] = -25.0483, x_hat[90] = -25.0034 -23.9384 -10.0041 \n", + "t = 3.06667, y[91] = -25.8491, x_hat[91] = -25.8082 -24.2812 -10.0082 \n", + "t = 3.1, y[92] = -26.6608, x_hat[92] = -26.6238 -24.6231 -10.0118 \n", + "t = 3.13333, y[93] = -27.4834, x_hat[93] = -27.4501 -24.9642 -10.015 \n", + "t = 3.16667, y[94] = -28.3169, x_hat[94] = -28.2872 -25.3046 -10.0177 \n", + "t = 3.2, y[95] = -29.1614, x_hat[95] = -29.135 -25.6443 -10.0201 \n", + "t = 3.23333, y[96] = -30.0167, x_hat[96] = -29.9937 -25.9833 -10.0222 \n", + "t = 3.26667, y[97] = -30.8829, x_hat[97] = -30.863 -26.3216 -10.0239 \n", + "t = 3.3, y[98] = -31.76, x_hat[98] = -31.7432 -26.6593 -10.0254 \n", + "t = 3.33333, y[99] = -32.6481, x_hat[99] = -32.6341 -26.9964 -10.0265 \n", + "t = 3.36667, y[100] = -33.547, x_hat[100] = -33.5358 -27.333 -10.0274 \n", + "t = 3.4, y[101] = -34.4568, x_hat[101] = -34.4483 -27.669 -10.0281 \n", + "t = 3.43333, y[102] = -35.3775, x_hat[102] = -35.3716 -28.0045 -10.0286 \n", + "t = 3.46667, y[103] = -36.3092, x_hat[103] = -36.3056 -28.3395 -10.0289 \n", + "t = 3.5, y[104] = -37.2517, x_hat[104] = -37.2505 -28.674 -10.029 \n", + "t = 3.53333, y[105] = -38.2051, x_hat[105] = -38.2061 -29.0081 -10.0289 \n", + "t = 3.56667, y[106] = -39.1694, x_hat[106] = -39.1726 -29.3418 -10.0287 \n", + "t = 3.6, y[107] = -40.1447, x_hat[107] = -40.1498 -29.6751 -10.0283 \n", + "t = 3.63333, y[108] = -41.1308, x_hat[108] = -41.1378 -30.008 -10.0278 \n", + "t = 3.66667, y[109] = -42.1278, x_hat[109] = -42.1367 -30.3405 -10.0272 \n", + "t = 3.7, y[110] = -43.1357, x_hat[110] = -43.1463 -30.6727 -10.0265 \n", + "t = 3.73333, y[111] = -44.1546, x_hat[111] = -44.1668 -31.0046 -10.0257 \n", + "t = 3.76667, y[112] = -45.1843, x_hat[112] = -45.1981 -31.3362 -10.0247 \n", + "t = 3.8, y[113] = -46.2249, x_hat[113] = -46.2402 -31.6674 -10.0238 \n", + "t = 3.83333, y[114] = -47.2764, x_hat[114] = -47.2932 -31.9984 -10.0227 \n", + "t = 3.86667, y[115] = -48.3389, x_hat[115] = -48.3569 -32.3292 -10.0216 \n", + "t = 3.9, y[116] = -49.4122, x_hat[116] = -49.4315 -32.6597 -10.0204 \n", + "t = 3.93333, y[117] = -50.4964, x_hat[117] = -50.517 -32.9899 -10.0191 \n", + "t = 3.96667, y[118] = -51.5915, x_hat[118] = -51.6132 -33.3199 -10.0179 \n", + "t = 4, y[119] = -52.6976, x_hat[119] = -52.7204 -33.6497 -10.0165 \n", + "t = 4.03333, y[120] = -53.8145, x_hat[120] = -53.8383 -33.9793 -10.0152 \n", + "t = 4.06667, y[121] = -54.9423, x_hat[121] = -54.9671 -34.3087 -10.0138 \n", + "t = 4.1, y[122] = -56.081, x_hat[122] = -56.1067 -34.638 -10.0123 \n", + "t = 4.13333, y[123] = -57.2307, x_hat[123] = -57.2572 -34.967 -10.0109 \n", + "t = 4.16667, y[124] = -58.3912, x_hat[124] = -58.4185 -35.2959 -10.0094 \n", + "t = 4.2, y[125] = -59.5626, x_hat[125] = -59.5907 -35.6246 -10.0079 \n", + "t = 4.23333, y[126] = -60.7449, x_hat[126] = -60.7738 -35.9532 -10.0064 \n", + "t = 4.26667, y[127] = -61.9382, x_hat[127] = -61.9677 -36.2817 -10.0048 \n", + "t = 4.3, y[128] = -63.1423, x_hat[128] = -63.1724 -36.61 -10.0033 \n", + "t = 4.33333, y[129] = -64.3573, x_hat[129] = -64.388 -36.9381 -10.0018 \n", + "t = 4.36667, y[130] = -65.5832, x_hat[130] = -65.6145 -37.2662 -10.0002 \n", + "t = 4.4, y[131] = -66.8201, x_hat[131] = -66.8518 -37.5942 -9.99866 \n", + "t = 4.43333, y[132] = -68.0678, x_hat[132] = -68.1 -37.922 -9.9971 \n", + "t = 4.46667, y[133] = -69.3264, x_hat[133] = -69.3591 -38.2498 -9.99554 \n", + "t = 4.5, y[134] = -70.5959, x_hat[134] = -70.629 -38.5774 -9.99399 \n", + "t = 4.53333, y[135] = -71.8764, x_hat[135] = -71.9099 -38.905 -9.99244 \n", + "t = 4.56667, y[136] = -73.1677, x_hat[136] = -73.2015 -39.2324 -9.99089 \n", + "t = 4.6, y[137] = -74.4699, x_hat[137] = -74.5041 -39.5598 -9.98934 \n", + "t = 4.63333, y[138] = -75.783, x_hat[138] = -75.8175 -39.8871 -9.98781 \n", + "t = 4.66667, y[139] = -77.1071, x_hat[139] = -77.1418 -40.2144 -9.98628 \n", + "t = 4.7, y[140] = -78.442, x_hat[140] = -78.477 -40.5416 -9.98476 \n", + "t = 4.73333, y[141] = -79.7878, x_hat[141] = -79.823 -40.8687 -9.98324 \n", + "t = 4.76667, y[142] = -81.1445, x_hat[142] = -81.18 -41.1957 -9.98174 \n", + "t = 4.8, y[143] = -82.5122, x_hat[143] = -82.5478 -41.5227 -9.98025 \n", + "t = 4.83333, y[144] = -83.8907, x_hat[144] = -83.9265 -41.8497 -9.97877 \n", "\n", - "Exec Success, Final kf states:-13.3236 -16.4505 -8.96991 \n" + "Exec Success, Final kf states:-83.9265 -41.8497 -9.97877 \n" ] } ], "source": [ - "std::vector> g_res = pyrun_sim(1, true);" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "5e8f3f24-b003-4416-bb16-eabc480e437e", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector py_g_pred;\n", - "std::vector py_g_est;" + "std::vector> g_res = run_kf(true);" ] }, { - "cell_type": "code", - "execution_count": 22, - "id": "107f53c0-0f2d-47b9-a218-67c29e16b87b", + "cell_type": "markdown", + "id": "835df3bf-9017-4a24-ac29-8e9d821bb881", "metadata": {}, - "outputs": [], "source": [ - "int k = g_res.size();" + "### Defining a C++ function to resolve the 2D vector result *{Kalman Filter Estimates, Kalman Gain}*\n", + "\n", + "- The function returns the KF estimates if the provided axis is 0 and Kalman Gains if axis = 1" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "63f5ba73-393b-4a0a-9c83-6829b56fc526", + "execution_count": 26, + "id": "2a47ecf6-f4e9-44b6-b72c-a935ed02fdc1", "metadata": {}, "outputs": [], "source": [ @@ -659,129 +1099,154 @@ " }\n", " return ret;\n", "}\n", - " " + " " ] }, { - "cell_type": "code", - "execution_count": 24, - "id": "a6bc89d0-026f-4f52-b8c9-9a8fca6948f2", + "cell_type": "markdown", + "id": "1566d4bf-3952-4616-8b45-b0231c5be9f5", "metadata": {}, - "outputs": [], "source": [ - "py_g_pred = ret_1d_vector(g_res, 0);\n", - "py_g_est = ret_1d_vector(g_res, 1);" + "#### We obtain the final results as two std::vectors that can be accessed in Python:" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "1b22451b-0bd7-48d6-8590-ff59b56a8031", + "execution_count": 27, + "id": "4e3c7506-8fb8-42f0-b5af-e8950d088a3d", "metadata": {}, "outputs": [], "source": [ - "void printMatrix(const std::vector& vec) {\n", - " for (size_t i = 0; i < vec.size(); i++) {\n", - " std::cout << vec[i] << \" \";\n", - " }\n", - " std::cout << std::endl;\n", - " }\n" + "std::vector py_g_pred = ret_1d_vector(g_res, 0);\n", + "std::vector py_kf_gains = ret_1d_vector(g_res, 1);" ] }, { - "cell_type": "code", - "execution_count": 26, - "id": "47fd1710-59a8-4d80-96de-04ef86e233fd", + "cell_type": "markdown", + "id": "2f7b1159-8a34-41c2-99d0-56a7e5714cab", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 0.00110609 0.00356059 0.00712492 0.013793 0.0154301 0.031192 0.0374157 0.0316618 0.0173069 0.039685 0.0527055 0.00552738 -0.00849014 -0.0224249 -0.0729322 -0.139272 -0.288577 -0.396929 -0.484796 -0.747944 -1.04221 -1.43333 -1.83638 -2.13797 -2.44388 -2.69673 -3.07251 -3.6016 -4.20755 -4.67894 -5.27548 -5.81837 -6.3351 -6.94502 -7.17158 -7.78709 -8.28638 -8.75833 -9.22649 -9.60338 -9.95378 -10.4545 -10.8401 -11.2291 -11.5854 -11.9065 -12.1908 -12.4381 -12.6488 -12.8241 -12.9653 -13.0743 -13.1531 -13.2039 -13.2289 -13.2301 -13.2098 -13.17 -13.1126 -13.0395 -12.9525 -12.853 -12.7428 -12.623 -12.4951 -12.3601 -12.2191 -12.0731 -11.9229 -11.7694 -11.6132 -11.4551 -11.2954 -11.1349 -10.9739 -10.8128 -10.652 -10.4918 -10.3325 -10.1743 -10.0175 -9.86215 -9.70853 -9.55674 -9.4069 -9.25909 -9.1134 -8.96991 \n" - ] - } - ], "source": [ - "printMatrix(py_g_pred);" + "### Lets plot the trend of Kalman Gain across the time steps to verify its working\n", + "\n", + "If the algorithm converged properly, we should see an exponential decrease in the gain with time" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "d0152901-3f6d-49e5-9138-126d1b072912", + "execution_count": 28, + "id": "129a199c-70b0-454b-a70d-68373c4786f2", "metadata": {}, "outputs": [], "source": [ - "// %%python\n", + "%%python\n", "\n", - "// expand_factor = [1, 2, 3, 4, 5]\n", - "// benchmarks = []\n", + "import matplotlib.pyplot as plt\n", "\n", - "// for i in expand_factor:\n", - "// time_kf = cppyy.gbl.pyrun_sim(int(i), 0)\n", - "// benchmarks.append(time_kf)\n", - "// " + "kalman_gains = list(cppyy.gbl.py_kf_gains)\n", + "x = range(len(kalman_gains))" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "2c0b5c86-7875-45d9-8113-00febd1be3d5", + "execution_count": 29, + "id": "f7bcd055-7470-42c9-aad8-238ff3d3566e", "metadata": {}, "outputs": [], "source": [ "%%python\n", "\n", - "import matplotlib.pyplot as plt" + "plt.plot(x, kalman_gains, color='blue', marker='v')\n", + " \n", + "plt.xlabel('Time Steps')\n", + "plt.ylabel('Kalman Gain')\n", + "plt.title('Kalman Gain Plot')\n", + "plt.savefig(\"kalman_gain_plot.jpg\")\n", + " \n", + "plt.yscale('symlog')\n", + " \n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "842b0e92-4722-4551-81a6-e3a00229da0f", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "8ae33816-740c-455b-82d2-4cacf29206c1", + "metadata": {}, + "source": [ + "### Now we can plot the values of Gravitational Acceleration *(g)* obtained as Kalman Estimates :" ] }, { "cell_type": "code", - "execution_count": 29, - "id": "e0d86e62-207f-4e14-80c1-2e04c8984fd6", + "execution_count": 30, + "id": "8801e6da-b52b-46b1-b895-e138bbaa39c0", "metadata": {}, "outputs": [], "source": [ "%%python\n", "\n", + "import matplotlib.pyplot as plt\n", + "\n", "true_val = 9.81\n", "g_pred = list(cppyy.gbl.py_g_pred)\n", "g_pred = list(-x for x in g_pred)\n", - "x = range(len(g_pred))\n", - "\n", + " \n", + "x = range(len(g_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "09889e69-195e-44c0-87ee-a82616492101", + "metadata": {}, + "outputs": [], + "source": [ + "%%python\n", "\n", - "# Plot the constant green line\n", + "plt.figure()\n", "plt.axhline(y=true_val, color='green', linestyle='-')\n", - "\n", - "# Plot g_pred in orange\n", "plt.plot(x, g_pred, color='orange', marker='o', label='KF Estimates')\n", "plt.annotate(f'{true_val}', xy=(-0.5, true_val), color='green',\n", " verticalalignment='center', horizontalalignment = 'left')\n", - "# Add labels and title\n", + " \n", "plt.xlabel('Index')\n", "plt.ylabel('Acceleration (m/sā‚‚)')\n", "plt.title('True Value vs. g_pred')\n", "plt.legend()\n", - "plt.savefig(\"1D_kf_plot.jpg\")\n", + "plt.savefig(\"1D_KF_plot.jpg\")\n", " \n", "plt.yscale('symlog')\n", - "# Show the plot\n", + " \n", "plt.show()\n" ] }, { "cell_type": "markdown", - "id": "7f478ffc-eafa-4c98-9fcc-cd3d18833987", + "id": "96887ee7-3a13-4440-8f27-8e93b5bc96db", + "metadata": {}, + "source": [ + "### Result : The Kalman Filter begins to converge on the value of 9.81" + ] + }, + { + "cell_type": "markdown", + "id": "6596ee2f-a810-4e5c-a4c7-5634d71ec608", "metadata": {}, "source": [ - "" + "" ] }, { "cell_type": "code", "execution_count": null, - "id": "145a5f37-15ae-4336-b126-fba8b6760de2", + "id": "9788e8c7-cd17-412c-9158-cbfa601e935d", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/kalman_CUDA_demo/run_kf_CUDA.ipynb b/notebooks/kalman_CUDA_demo/run_kf_CUDA.ipynb deleted file mode 100644 index 39f3a2e9..00000000 --- a/notebooks/kalman_CUDA_demo/run_kf_CUDA.ipynb +++ /dev/null @@ -1,928 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "afdccef0-6aaa-43d6-ac29-7ea950356e19", - "metadata": {}, - "outputs": [], - "source": [ - "#include \"kalman.hpp\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "6feb6a99-9580-4bf7-aa48-0a7d175494a4", - "metadata": {}, - "outputs": [], - "source": [ - "#include \n", - "#include \n", - "#include \n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ebcc8af4-32b2-418b-bfbe-9ffb4ff9b0ff", - "metadata": {}, - "outputs": [], - "source": [ - "__global__ void matAddKernel(double* a, double* b, double* c, int rows, int cols) {\n", - " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", - " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", - "\n", - " if (col < cols && row < rows) {\n", - " int idx = row * cols + col;\n", - " c[idx] = a[idx] + b[idx];\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3f7f95d9-2f3d-4600-8896-0ed4dcca1ed6", - "metadata": {}, - "outputs": [], - "source": [ - "__global__ void matSubKernel(double* a, double* b, double* c, int rows, int cols) {\n", - " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", - " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", - "\n", - " if (col < cols && row < rows) {\n", - " int idx = row * cols + col;\n", - " c[idx] = a[idx] - b[idx];\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "00183ab5-0380-4773-a34c-053fef7782f3", - "metadata": {}, - "outputs": [], - "source": [ - "__global__ void matTransposeKernel(double* a, double* c, int rows, int cols) {\n", - " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", - " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", - "\n", - " if (col < cols && row < rows) {\n", - " int idx_in = row * cols + col;\n", - " int idx_out = col * rows + row;\n", - " c[idx_out] = a[idx_in];\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d0eff8fe-34a5-4370-b6ff-118cd4d7cb22", - "metadata": {}, - "outputs": [], - "source": [ - "__global__ void matvecmulKernel(double* d_mat, double* d_vec, double* d_result, int rows, int cols) {\n", - " int tid = blockIdx.x * blockDim.x + threadIdx.x;\n", - "\n", - " if (tid < rows) {\n", - " double sum = 0.0;\n", - " for (int j = 0; j < cols; j++) {\n", - " sum += d_mat[tid * cols + j] * d_vec[j];\n", - " }\n", - " d_result[tid] = sum;\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "21718df0-5f56-4122-bc1a-a368b306fdaf", - "metadata": {}, - "outputs": [], - "source": [ - "__global__ void matmulKernel(double* d_a, double* d_b, double* d_result, int rowsA, int colsA, int colsB) {\n", - " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", - " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", - "\n", - " if (row < rowsA && col < colsB) {\n", - " double value = 0.0;\n", - " for (int k = 0; k < colsA; k++) {\n", - " value += d_a[row * colsA + k] * d_b[k * colsB + col];\n", - " }\n", - " d_result[row * colsB + col] = value;\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "18f843c9-9aee-4055-aae7-473c8f121037", - "metadata": {}, - "outputs": [], - "source": [ - "__global__ void vecsubKernel(const double* a, const double* b, double* result, int len) {\n", - " int idx = blockIdx.x * blockDim.x + threadIdx.x;\n", - " if (idx < len) {\n", - " result[idx] = a[idx] - b[idx];\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "6ff06da4-1430-4720-affb-b9c8b1dc7a26", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector> mataddCUDA(const std::vector>& a, const std::vector>& b) {\n", - " int rows = a.size();\n", - " int cols = a[0].size();\n", - " \n", - " double* h_a = new double[rows*cols];\n", - " double* h_b = new double[rows*cols];\n", - " double* h_c = new double[rows*cols];\n", - " \n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " h_a[i*cols + j] = a[i][j];\n", - " h_b[i*cols + j] = b[i][j];\n", - " }\n", - " }\n", - " \n", - " double* d_a, * d_b, * d_c;\n", - " cudaMalloc((void**)&d_a, rows*cols*sizeof(double));\n", - " cudaMalloc((void**)&d_b, rows*cols*sizeof(double));\n", - " cudaMalloc((void**)&d_c, rows*cols*sizeof(double));\n", - "\n", - " cudaMemcpy(d_a, h_a, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", - " cudaMemcpy(d_b, h_b, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", - "\n", - " dim3 dimBlock(16, 16);\n", - " dim3 dimGrid((cols + dimBlock.x - 1) / dimBlock.x, (rows + dimBlock.y - 1) / dimBlock.y);\n", - " matAddKernel<<>>(d_a, d_b, d_c, rows, cols);\n", - "\n", - " cudaMemcpy(h_c, d_c, rows*cols*sizeof(double), cudaMemcpyDeviceToHost);\n", - "\n", - " cudaFree(d_a);\n", - " cudaFree(d_b);\n", - " cudaFree(d_c);\n", - "\n", - " std::vector> result(rows, std::vector(cols));\n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " result[i][j] = h_c[i*cols + j];\n", - " }\n", - " }\n", - "\n", - " delete[] h_a;\n", - " delete[] h_b;\n", - " delete[] h_c;\n", - "\n", - " return result;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "b70f7f32-b7c2-4a1c-96a0-aea42b15f2aa", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector> matsubCUDA(const std::vector>& a, const std::vector>& b) {\n", - " int rows = a.size();\n", - " int cols = a[0].size();\n", - " \n", - " double* h_a = new double[rows*cols];\n", - " double* h_b = new double[rows*cols];\n", - " double* h_c = new double[rows*cols];\n", - " \n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " h_a[i*cols + j] = a[i][j];\n", - " h_b[i*cols + j] = b[i][j];\n", - " }\n", - " }\n", - "\n", - " double* d_a, * d_b, * d_c;\n", - " cudaMalloc((void**)&d_a, rows*cols*sizeof(double));\n", - " cudaMalloc((void**)&d_b, rows*cols*sizeof(double));\n", - " cudaMalloc((void**)&d_c, rows*cols*sizeof(double));\n", - "\n", - " cudaMemcpy(d_a, h_a, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", - " cudaMemcpy(d_b, h_b, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", - "\n", - " dim3 dimBlock(16, 16); \n", - " dim3 dimGrid((cols + dimBlock.x - 1) / dimBlock.x, (rows + dimBlock.y - 1) / dimBlock.y);\n", - " matSubKernel<<>>(d_a, d_b, d_c, rows, cols);\n", - "\n", - " cudaMemcpy(h_c, d_c, rows*cols*sizeof(double), cudaMemcpyDeviceToHost);\n", - "\n", - " cudaFree(d_a);\n", - " cudaFree(d_b);\n", - " cudaFree(d_c);\n", - "\n", - " std::vector> result(rows, std::vector(cols));\n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " result[i][j] = h_c[i*cols + j];\n", - " }\n", - " }\n", - "\n", - " delete[] h_a;\n", - " delete[] h_b;\n", - " delete[] h_c;\n", - "\n", - " return result;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "86b4a452-f04c-45a4-b69b-d87bcb2e5954", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector> mattransposeCUDA(const std::vector>& a) {\n", - " int rows = a.size();\n", - " int cols = a[0].size();\n", - " \n", - " double* h_a = new double[rows*cols];\n", - " double* h_c = new double[cols*rows];\n", - " \n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " h_a[i*cols + j] = a[i][j];\n", - " }\n", - " }\n", - "\n", - " double* d_a, * d_c;\n", - " cudaMalloc((void**)&d_a, rows*cols*sizeof(double));\n", - " cudaMalloc((void**)&d_c, cols*rows*sizeof(double));\n", - "\n", - " cudaMemcpy(d_a, h_a, rows*cols*sizeof(double), cudaMemcpyHostToDevice);\n", - "\n", - " dim3 dimBlock(16, 16);\n", - " dim3 dimGrid((cols + dimBlock.x - 1) / dimBlock.x, (rows + dimBlock.y - 1) / dimBlock.y);\n", - " matTransposeKernel<<>>(d_a, d_c, rows, cols);\n", - "\n", - " cudaMemcpy(h_c, d_c, cols*rows*sizeof(double), cudaMemcpyDeviceToHost);\n", - "\n", - " cudaFree(d_a);\n", - " cudaFree(d_c);\n", - "\n", - " std::vector> result(cols, std::vector(rows));\n", - " for (int i = 0; i < cols; i++) {\n", - " for (int j = 0; j < rows; j++) {\n", - " result[i][j] = h_c[i*rows + j];\n", - " }\n", - " }\n", - "\n", - " delete[] h_a;\n", - " delete[] h_c;\n", - "\n", - " return result;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "bf413652-f71a-4b91-9e6e-5520812dc3cb", - "metadata": {}, - "outputs": [], - "source": [ - "// helper function: matrix inversion\n", - "std::vector> matinverse(const std::vector>& a) {\n", - " size_t n = a.size();\n", - " \n", - " if (n != a[0].size()) {\n", - " std::cout<<\" Shape of a : \"<> result(2, std::vector(2));\n", - " result[0][0] = a[1][1] / determinant;\n", - " result[0][1] = -a[0][1] / determinant;\n", - " result[1][0] = -a[1][0] / determinant;\n", - " result[1][1] = a[0][0] / determinant;\n", - "\n", - " return result;\n", - " }\n", - "\n", - " if (n == 3) {\n", - " double determinant = a[0][0]*(a[1][1]*a[2][2]-a[2][1]*a[1][2]) \n", - " - a[0][1]*(a[1][0]*a[2][2]-a[1][2]*a[2][0]) \n", - " + a[0][2]*(a[1][0]*a[2][1]-a[1][1]*a[2][0]);\n", - " \n", - " if (determinant == 0) {\n", - " throw std::runtime_error(\"singular matrix\");\n", - " }\n", - "\n", - " std::vector> result(3, std::vector(3));\n", - "\n", - " result[0][0] = (a[1][1] * a[2][2] - a[2][1] * a[1][2]) / determinant;\n", - " result[0][1] = (a[0][2] * a[2][1] - a[0][1] * a[2][2]) / determinant;\n", - " result[0][2] = (a[0][1] * a[1][2] - a[0][2] * a[1][1]) / determinant;\n", - " result[1][0] = (a[1][2] * a[2][0] - a[1][0] * a[2][2]) / determinant;\n", - " result[1][1] = (a[0][0] * a[2][2] - a[0][2] * a[2][0]) / determinant;\n", - " result[1][2] = (a[1][0] * a[0][2] - a[0][0] * a[1][2]) / determinant;\n", - " result[2][0] = (a[1][0] * a[2][1] - a[2][0] * a[1][1]) / determinant;\n", - " result[2][1] = (a[2][0] * a[0][1] - a[0][0] * a[2][1]) / determinant;\n", - " result[2][2] = (a[0][0] * a[1][1] - a[1][0] * a[0][1]) / determinant;\n", - "\n", - " return result;\n", - " }\n", - "\n", - " throw std::runtime_error(\"Only 2x2 and 3x3 matrices supported for inversion\");\n", - "}\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "0d925dce-377c-4d23-96d6-9c4fead297cb", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector matvecmulCUDA(const std::vector>& a, const std::vector& b) {\n", - " int rows = a.size();\n", - " int cols = a[0].size();\n", - "\n", - " if (cols != b.size()) {\n", - " throw std::runtime_error(\"Matrix dims do not match for multiplication.\");\n", - " }\n", - "\n", - " std::vector h_mat(rows * cols);\n", - " for (int i = 0; i < rows; i++) {\n", - " for (int j = 0; j < cols; j++) {\n", - " h_mat[i * cols + j] = a[i][j];\n", - " }\n", - " }\n", - "\n", - " double *d_mat, *d_vec, *d_result;\n", - " cudaMalloc((void**)&d_mat, rows * cols * sizeof(double));\n", - " cudaMalloc((void**)&d_vec, cols * sizeof(double));\n", - " cudaMalloc((void**)&d_result, rows * sizeof(double));\n", - "\n", - " cudaMemcpy(d_mat, h_mat.data(), rows * cols * sizeof(double), cudaMemcpyHostToDevice);\n", - " cudaMemcpy(d_vec, b.data(), cols * sizeof(double), cudaMemcpyHostToDevice);\n", - "\n", - " int blockSize = 256;\n", - " int gridSize = (rows + blockSize - 1) / blockSize;\n", - "\n", - " matvecmulKernel<<>>(d_mat, d_vec, d_result, rows, cols);\n", - "\n", - " std::vector result(rows);\n", - " cudaMemcpy(result.data(), d_result, rows * sizeof(double), cudaMemcpyDeviceToHost);\n", - "\n", - " cudaFree(d_mat);\n", - " cudaFree(d_vec);\n", - " cudaFree(d_result);\n", - "\n", - " return result;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "69714460-9a3e-4966-9de7-5e4111fc40af", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector> matmulCUDA(const std::vector>& a, const std::vector>& b) {\n", - " int rowsA = a.size();\n", - " int colsA = a[0].size();\n", - " int rowsB = b.size();\n", - " int colsB = b[0].size();\n", - "\n", - " if (colsA != rowsB) {\n", - " throw std::runtime_error(\"Matrix dims do not match for multiplication.\");\n", - " }\n", - "\n", - " std::vector h_a(rowsA * colsA);\n", - " std::vector h_b(rowsB * colsB);\n", - "\n", - " for (int i = 0; i < rowsA; i++) {\n", - " for (int j = 0; j < colsA; j++) {\n", - " h_a[i * colsA + j] = a[i][j];\n", - " }\n", - " }\n", - "\n", - " for (int i = 0; i < rowsB; i++) {\n", - " for (int j = 0; j < colsB; j++) {\n", - " h_b[i * colsB + j] = b[i][j];\n", - " }\n", - " }\n", - "\n", - " double *d_a, *d_b, *d_result;\n", - " \n", - " cudaMalloc((void**)&d_a, rowsA * colsA * sizeof(double));\n", - " cudaMalloc((void**)&d_b, rowsB * colsB * sizeof(double));\n", - " cudaMalloc((void**)&d_result, rowsA * colsB * sizeof(double));\n", - "\n", - " cudaMemcpy(d_a, h_a.data(), rowsA * colsA * sizeof(double), cudaMemcpyHostToDevice);\n", - " cudaMemcpy(d_b, h_b.data(), rowsB * colsB * sizeof(double), cudaMemcpyHostToDevice);\n", - "\n", - " dim3 blockSize(16, 16);\n", - " dim3 gridSize((colsB + blockSize.x - 1) / blockSize.x, (rowsA + blockSize.y - 1) / blockSize.y);\n", - "\n", - " matmulKernel<<>>(d_a, d_b, d_result, rowsA, colsA, colsB);\n", - "\n", - " std::vector h_result(rowsA * colsB);\n", - " cudaMemcpy(h_result.data(), d_result, rowsA * colsB * sizeof(double), cudaMemcpyDeviceToHost);\n", - "\n", - " cudaFree(d_a);\n", - " cudaFree(d_b);\n", - " cudaFree(d_result);\n", - " \n", - " std::vector> result(rowsA, std::vector(colsB));\n", - " for (int i = 0; i < rowsA; i++) {\n", - " for (int j = 0; j < colsB; j++) {\n", - " result[i][j] = h_result[i * colsB + j];\n", - " }\n", - " }\n", - "\n", - " return result;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "7295e929-dcfc-43be-9b3b-82e9f9fbf9db", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector vecsubCUDA(const std::vector& a, const std::vector& b) {\n", - " int len = a.size();\n", - "\n", - " double* d_a;\n", - " double* d_b;\n", - " double* d_result;\n", - "\n", - " cudaMalloc((void**)&d_a, len * sizeof(double));\n", - " cudaMalloc((void**)&d_b, len * sizeof(double));\n", - " cudaMalloc((void**)&d_result, len * sizeof(double));\n", - "\n", - " cudaMemcpy(d_a, a.data(), len * sizeof(double), cudaMemcpyHostToDevice);\n", - " cudaMemcpy(d_b, b.data(), len * sizeof(double), cudaMemcpyHostToDevice);\n", - "\n", - " int blockSize = 256; \n", - " int gridSize = (len + blockSize - 1) / blockSize;\n", - "\n", - " vecsubKernel<<>>(d_a, d_b, d_result, len);\n", - "\n", - " std::vector result(len);\n", - " cudaMemcpy(result.data(), d_result, len * sizeof(double), cudaMemcpyDeviceToHost);\n", - "\n", - " cudaFree(d_a);\n", - " cudaFree(d_b);\n", - " cudaFree(d_result);\n", - "\n", - " return result;\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "84f29c1e-d4c2-4680-a579-14d63a2bde08", - "metadata": {}, - "outputs": [], - "source": [ - "void printMatrix(const std::vector>& matrix) {\n", - " for (size_t i = 0; i < matrix.size(); i++) {\n", - " for (size_t j = 0; j < matrix[i].size(); j++) {\n", - " std::cout << matrix[i][j] << \" \";\n", - " }\n", - " std::cout << std::endl;\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "6d718c47-3bc6-4c7a-893c-4fedc399926d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result matrix:\n", - "11 14 17 20 \n", - "23 30 37 44 \n", - "35 46 57 68 \n" - ] - } - ], - "source": [ - "// test for matrix matrix multiplication using CUDA\n", - "std::vector> matrixA = {\n", - " {1.0, 2.0},\n", - " {3.0, 4.0},\n", - " {5.0, 6.0}\n", - " };\n", - "\n", - " std::vector> matrixB = {\n", - " {1.0, 2.0, 3.0, 4.0},\n", - " {5.0, 6.0, 7.0, 8.0}\n", - " };\n", - "\n", - " std::vector> result = matmulCUDA(matrixA, matrixB);\n", - "\n", - " // Print the result\n", - " std::cout << \"Result matrix:\" << std::endl;\n", - " printMatrix(result);" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "f4c9238f-674a-4cf8-8128-840cc7f61e0f", - "metadata": {}, - "outputs": [], - "source": [ - "//set up class and constructor\n", - "KalmanFilter::KalmanFilter(\n", - " double dt,\n", - " const std::vector>& A,\n", - " const std::vector>& C,\n", - " const std::vector>& Q,\n", - " const std::vector>& R,\n", - " const std::vector>& P)\n", - " : A(A), C(C), Q(Q), R(R), P0(P),\n", - " m(C.size()), n(A.size()), dt(dt), initialized(false),\n", - " I(n, std::vector(n)), x_hat(n), x_hat_new(n)\n", - "{\n", - " for (int i = 0; i < n; i++) {\n", - " I[i][i] = 1.0;\n", - " }\n", - "}\n", - "\n", - "KalmanFilter::KalmanFilter() {}" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "bd953ec6-ceed-489c-8e33-8eb7ae6bb32f", - "metadata": {}, - "outputs": [], - "source": [ - "// init the kf states + measurements \n", - "void KalmanFilter::init(double t0, const std::vector& x0) {\n", - " x_hat = x0;\n", - " P = P0;\n", - " this->t0 = t0;\n", - " t = t0;\n", - " initialized = true;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "2840f0f7-52de-4f47-a9dc-e257235df002", - "metadata": {}, - "outputs": [], - "source": [ - "void KalmanFilter::init() {\n", - " std::fill(x_hat.begin(), x_hat.end(), 0.0);\n", - " P = P0;\n", - " t0 = 0;\n", - " t = t0;\n", - " initialized = true;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "ea057f20-9097-4dd3-946e-ab7a383afe69", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "void KalmanFilter::update(const std::vector& y) {\n", - " if (!initialized)\n", - " throw std::runtime_error(\"Filter is not initialized!\");\n", - "\n", - " x_hat_new = matvecmulCUDA(A, x_hat);\n", - "\n", - " P = mataddCUDA(matmulCUDA(matmulCUDA(A, P), mattransposeCUDA(A)), Q);\n", - "\n", - " std::vector> inv = matinverse(mataddCUDA(matmulCUDA(matmulCUDA(C, P), mattransposeCUDA(C)), R));\n", - "\n", - " K = matmulCUDA(matmulCUDA(P, mattransposeCUDA(C)), inv);\n", - " std::vector temp = matvecmulCUDA(C, x_hat_new);\n", - " std::vector difference = vecsubCUDA(y, temp);\n", - "\n", - " for (size_t i = 0; i < x_hat_new.size(); i++) {\n", - " x_hat_new[i] += matvecmulCUDA(K, difference)[i];\n", - " }\n", - "\n", - " P = matmulCUDA(matsubCUDA(I, matmulCUDA(K, C)), P);\n", - "\n", - " x_hat = x_hat_new;\n", - " t += dt;\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "08d28163-a086-46a9-bf41-044f48530929", - "metadata": {}, - "outputs": [], - "source": [ - "void KalmanFilter::update(const std::vector& y, double dt, const std::vector>& A) {\n", - " this->A = A;\n", - " this->dt = dt;\n", - " update(y);\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "79ed17e5-e20a-4155-849b-41b2b9279b9f", - "metadata": {}, - "outputs": [], - "source": [ - "std::vector generateExpandedData(const std::vector& baseData, int repetitions) {\n", - " std::vector expandedData;\n", - " for (int r = 0; r < repetitions; r++) {\n", - " for (auto val : baseData) {\n", - " // Add slight random perturbation to the data.\n", - " double perturbedValue = val + ((double)rand() / RAND_MAX - 0.5);\n", - " expandedData.push_back(perturbedValue);\n", - " }\n", - " }\n", - " return expandedData;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "8938a288-fc2c-4104-82c6-added4466c8c", - "metadata": {}, - "outputs": [], - "source": [ - "int run_kf(int expansion_factor, bool verbose) {\n", - " \n", - " int n = 3; // Number of states\n", - " int m = 1; // Number of measurements\n", - "\n", - " double dt = 1.0 / 30; // Time step\n", - " \n", - " std::vector> A(n, std::vector(n));\n", - " std::vector> C(m, std::vector(n));\n", - " std::vector> Q(n, std::vector(n));\n", - " std::vector> R(m, std::vector(m));\n", - " std::vector> P(n, std::vector(n));\n", - " \n", - " A = {{1, dt, 0}, {0, 1, dt}, {0, 0, 1}};\n", - " C = {{1, 0, 0}};\n", - " Q = {{.05, .05, .0}, {.05, .05, .0}, {.0, .0, .0}};\n", - " R = {{5}};\n", - " P = {{.1, .1, .1}, {.1, 10000, 10}, {.1, 10, 100}};\n", - " \n", - " KalmanFilter kf(dt, A, C, Q, R, P);\n", - " \n", - " std::vector measurements = {\n", - " 1.04202710058, 1.10726790452, 1.2913511148, 1.48485250951, 1.72825901034,\n", - " 1.74216489744, 2.11672039768, 2.14529225112, 2.16029641405, 2.21269371128,\n", - " 2.57709350237, 2.6682215744, 2.51641839428, 2.76034056782, 2.88131780617,\n", - " 2.88373786518, 2.9448468727, 2.82866600131, 3.0006601946, 3.12920591669,\n", - " 2.858361783, 2.83808170354, 2.68975330958, 2.66533185589, 2.81613499531,\n", - " 2.81003612051, 2.88321849354, 2.69789264832, 2.4342229249, 2.23464791825,\n", - " 2.30278776224, 2.02069770395, 1.94393985809, 1.82498398739, 1.52526230354,\n", - " 1.86967808173, 1.18073207847, 1.10729605087, 0.916168349913, 0.678547664519,\n", - " 0.562381751596, 0.355468474885, -0.155607486619, -0.287198661013, -0.602973173813\n", - " };\n", - " \n", - " std::vector expandedMeasurements = generateExpandedData(measurements, expansion_factor);\n", - " \n", - " std::vector x0 = {expandedMeasurements[0], 0, -9.81};\n", - " kf.init(0, x0);\n", - "\n", - " // Feed measurements into filter, output estimated states\n", - " std::vector y(m);\n", - " if(verbose) {\n", - " std::cout << \"t = \" << 0 << \", \" << \"x_hat[0]: \";\n", - " for (auto& val : kf.state()) std::cout << val << \" \";\n", - " std::cout << std::endl;\n", - " }\n", - " \n", - " int i;\n", - " for (i = 0; i < measurements.size(); i++) {\n", - " y[0] = measurements[i];\n", - " kf.update(y);\n", - " if(verbose) {\n", - " std::cout << \"t = \" << (i + 1) * dt << \", y[\" << i << \"] = \" << y[0] << \", x_hat[\" << i << \"] = \";\n", - " for (auto& val : kf.state()) std::cout << val << \" \";\n", - " std::cout << std::endl;\n", - " }\n", - " }\n", - " std::cout << std::endl;\n", - " std::cout<<\"Exec Success, Final kf states:\";\n", - " for (auto& val : kf.state()) std::cout << val << \" \";\n", - " std::cout << std::endl;\n", - " return 0;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "7ea068c2-fc1f-4ad4-a2c9-aecd66cbbf71", - "metadata": {}, - "outputs": [], - "source": [ - "#include \n", - "auto start = std::chrono::high_resolution_clock::now();\n", - "auto stop = std::chrono::high_resolution_clock::now();\n", - "auto duration = std::chrono::duration_cast(stop - start);" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "1e1f74bf-1118-421f-a39f-00ce72db9923", - "metadata": {}, - "outputs": [], - "source": [ - "int pyrun_sim(int exp_factor, bool verbose) {\n", - " start = std::chrono::high_resolution_clock::now();\n", - " run_kf(exp_factor, verbose);\n", - " stop = std::chrono::high_resolution_clock::now();\n", - " duration = std::chrono::duration_cast(stop - start);\n", - " return duration.count();\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "7a910405-c4ff-41fa-aec5-69aa24598f9c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "t = 0, x_hat[0]: 0.737686 0 -9.81 \n", - "t = 0.0333333, y[0] = 1.04203, x_hat[0] = 0.948486 5.91215 -9.80189 \n", - "t = 0.0666667, y[1] = 1.10727, x_hat[1] = 1.11742 5.16313 -9.80246 \n", - "t = 0.1, y[2] = 1.29135, x_hat[2] = 1.29067 4.8479 -9.80243 \n", - "t = 0.133333, y[3] = 1.48485, x_hat[3] = 1.46956 4.65232 -9.8015 \n", - "t = 0.166667, y[4] = 1.72826, x_hat[4] = 1.67214 4.61508 -9.79665 \n", - "t = 0.2, y[5] = 1.74216, x_hat[5] = 1.79217 4.11586 -9.80259 \n", - "t = 0.233333, y[6] = 2.11672, x_hat[6] = 1.99697 4.08721 -9.78381 \n", - "t = 0.266667, y[7] = 2.14529, x_hat[7] = 2.13716 3.7765 -9.78219 \n", - "t = 0.3, y[8] = 2.1603, x_hat[8] = 2.23217 3.34189 -9.80002 \n", - "t = 0.333333, y[9] = 2.21269, x_hat[9] = 2.30708 2.89745 -9.82839 \n", - "t = 0.366667, y[10] = 2.57709, x_hat[10] = 2.44897 2.70633 -9.78267 \n", - "t = 0.4, y[11] = 2.66822, x_hat[11] = 2.57104 2.47129 -9.74221 \n", - "t = 0.433333, y[12] = 2.51642, x_hat[12] = 2.62117 2.05785 -9.79233 \n", - "t = 0.466667, y[13] = 2.76034, x_hat[13] = 2.70573 1.77429 -9.76272 \n", - "t = 0.5, y[14] = 2.88132, x_hat[14] = 2.79035 1.51648 -9.70754 \n", - "t = 0.533333, y[15] = 2.88374, x_hat[15] = 2.85003 1.21709 -9.68495 \n", - "t = 0.566667, y[16] = 2.94485, x_hat[16] = 2.90195 0.924505 -9.65353 \n", - "t = 0.6, y[17] = 2.82867, x_hat[17] = 2.91129 0.544786 -9.7189 \n", - "t = 0.633333, y[18] = 3.00066, x_hat[18] = 2.94401 0.260709 -9.67101 \n", - "t = 0.666667, y[19] = 3.12921, x_hat[19] = 2.98862 0.0383847 -9.54536 \n", - "t = 0.7, y[20] = 2.85836, x_hat[20] = 2.96317 -0.355458 -9.64337 \n", - "t = 0.733333, y[21] = 2.83808, x_hat[21] = 2.92828 -0.743058 -9.73076 \n", - "t = 0.766667, y[22] = 2.68975, x_hat[22] = 2.85989 -1.19415 -9.89987 \n", - "t = 0.8, y[23] = 2.66533, x_hat[23] = 2.7884 -1.61706 -10.0242 \n", - "t = 0.833333, y[24] = 2.81613, x_hat[24] = 2.75128 -1.9017 -9.95817 \n", - "t = 0.866667, y[25] = 2.81004, x_hat[25] = 2.71309 -2.1591 -9.85946 \n", - "t = 0.9, y[26] = 2.88322, x_hat[26] = 2.69122 -2.3396 -9.66543 \n", - "t = 0.933333, y[27] = 2.69789, x_hat[27] = 2.63079 -2.61001 -9.59857 \n", - "t = 0.966667, y[28] = 2.43422, x_hat[28] = 2.52104 -2.99666 -9.68334 \n", - "t = 1, y[29] = 2.23465, x_hat[29] = 2.38245 -3.43211 -9.82404 \n", - "t = 1.03333, y[30] = 2.30279, x_hat[30] = 2.27525 -3.73881 -9.7986 \n", - "t = 1.06667, y[31] = 2.0207, x_hat[31] = 2.12376 -4.14204 -9.89064 \n", - "t = 1.1, y[32] = 1.94394, x_hat[32] = 1.97709 -4.49596 -9.91916 \n", - "t = 1.13333, y[33] = 1.82498, x_hat[33] = 1.82676 -4.82788 -9.92063 \n", - "t = 1.16667, y[34] = 1.52526, x_hat[34] = 1.63716 -5.23716 -10.0093 \n", - "t = 1.2, y[35] = 1.86968, x_hat[35] = 1.54514 -5.34796 -9.76318 \n", - "t = 1.23333, y[36] = 1.18073, x_hat[36] = 1.32938 -5.77305 -9.8709 \n", - "t = 1.26667, y[37] = 1.1073, x_hat[37] = 1.13101 -6.11758 -9.88731 \n", - "t = 1.3, y[38] = 0.916168, x_hat[38] = 0.924926 -6.45274 -9.89309 \n", - "t = 1.33333, y[39] = 0.678548, x_hat[39] = 0.703675 -6.7981 -9.90889 \n", - "t = 1.36667, y[40] = 0.562382, x_hat[40] = 0.493728 -7.08693 -9.86774 \n", - "t = 1.4, y[41] = 0.355468, x_hat[41] = 0.276462 -7.36943 -9.82262 \n", - "t = 1.43333, y[42] = -0.155607, x_hat[42] = -0.00496317 -7.78298 -9.9046 \n", - "t = 1.46667, y[43] = -0.287199, x_hat[43] = -0.268734 -8.1234 -9.91418 \n", - "t = 1.5, y[44] = -0.602973, x_hat[44] = -0.551482 -8.48174 -9.93964 \n", - "\n", - "Exec Success, Final kf states:-0.551482 -8.48174 -9.93964 \n" - ] - } - ], - "source": [ - "pyrun_sim(10, true)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "76110101-38eb-4100-820e-8407ba943ce0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Exec Success, Final kf states:-0.485139 -7.98525 -9.02124 \n", - "\n", - "Exec Success, Final kf states:-0.436986 -7.62488 -8.35466 \n", - "\n", - "Exec Success, Final kf states:-0.516416 -8.21932 -9.45422 \n", - "\n", - "Exec Success, Final kf states:-0.476838 -7.92312 -8.90633 \n", - "\n", - "Exec Success, Final kf states:-0.547245 -8.45003 -9.88098 \n" - ] - } - ], - "source": [ - "%%python\n", - "\n", - "expand_factor = [500000, 1000000, 10000000, 20000000, 40000000]\n", - "benchmarks = []\n", - "\n", - "for i in expand_factor:\n", - " time_kf = cppyy.gbl.pyrun_sim(int(i), 0)\n", - " benchmarks.append(time_kf)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "2a47ecf6-f4e9-44b6-b72c-a935ed02fdc1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[880, 1584, 13726, 27178, 54014]\n" - ] - } - ], - "source": [ - "%%python\n", - "\n", - "print(benchmarks)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "CUDA (C++17)", - "language": "CUDA", - "name": "cuda-xcpp17" - }, - "language_info": { - "codemirror_mode": "text/x-c++src", - "file_extension": ".cpp", - "mimetype": "text/x-c++src", - "name": "c++", - "version": "17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}