diff --git a/demos/CUDA/BlackScholes/BlackScholes.cu b/demos/CUDA/BlackScholes/BlackScholes.cu
index 7f6ddcf44..5e6df2cca 100644
--- a/demos/CUDA/BlackScholes/BlackScholes.cu
+++ b/demos/CUDA/BlackScholes/BlackScholes.cu
@@ -194,7 +194,7 @@ double computeL1norm_Call(float* S, float* X, float* T, float* d, Greek greek) {
   return sum_delta / sum_ref;
 }
 
-double computeL1norm_Pull(float* S, float* X, float* T, float* d, Greek greek) {
+double computeL1norm_Put(float* S, float* X, float* T, float* d, Greek greek) {
   double delta, ref, sum_delta, sum_ref;
   sum_delta = 0;
   sum_ref = 0;
@@ -337,7 +337,7 @@ int main(int argc, char** argv) {
     return EXIT_FAILURE;
   }
 
-  // Compute the derivatives of the price of the pull options
+  // Compute the derivatives of the price of the Put options
   for (int i = 0; i < OPT_N; i++) {
     h_CallResultCPU[i] = 0.0f;
     h_PutResultCPU[i] = -1.0f;
@@ -356,34 +356,34 @@ int main(int argc, char** argv) {
                   d_PutResultGPU, d_StockPrice, d_OptionStrike, d_OptionYears);
 
   // Verify delta
-  L1norm = computeL1norm_Pull(h_StockPrice, h_OptionStrike, h_OptionYears,
-                              d_StockPrice, Delta);
-  printf("L1norm of delta for Pull option = %E\n", L1norm);
+  L1norm = computeL1norm_Put(h_StockPrice, h_OptionStrike, h_OptionYears,
+                             d_StockPrice, Delta);
+  printf("L1norm of delta for Put option = %E\n", L1norm);
   if (L1norm > 1e-5) {
     printf("Gradient test failed: the difference between the computed and "
-           "the approximated theoretical delta for Pull option is larger than "
+           "the approximated theoretical delta for Put option is larger than "
            "expected\n");
     return EXIT_FAILURE;
   }
 
   // Verify derivatives with respect to the Strike price
-  L1norm = computeL1norm_Pull(h_StockPrice, h_OptionStrike, h_OptionYears,
-                              d_OptionStrike, dX);
-  printf("L1norm of derivative of Pull w.r.t. the strike price = %E\n", L1norm);
+  L1norm = computeL1norm_Put(h_StockPrice, h_OptionStrike, h_OptionYears,
+                             d_OptionStrike, dX);
+  printf("L1norm of derivative of Put w.r.t. the strike price = %E\n", L1norm);
   if (L1norm > 1e-6) {
     printf("Gradient test failed: the difference between the computed and the "
            "approximated theoretcial derivative of "
-           "PUll w.r.t. the strike price is larger than expected\n");
+           "Put w.r.t. the strike price is larger than expected\n");
     return EXIT_FAILURE;
   }
 
   // Verify theta
-  L1norm = computeL1norm_Pull(h_StockPrice, h_OptionStrike, h_OptionYears,
-                              d_OptionYears, Theta);
-  printf("L1norm of theta for Pull option = %E\n", L1norm);
+  L1norm = computeL1norm_Put(h_StockPrice, h_OptionStrike, h_OptionYears,
+                             d_OptionYears, Theta);
+  printf("L1norm of theta for Put option = %E\n", L1norm);
   if (L1norm > 1e-5) {
     printf("Gradient test failed: the difference between the computed and the "
-           "approximated theoretical theta for Pull option is larger than "
+           "approximated theoretical theta for Put option is larger than "
            "expected\n");
     return EXIT_FAILURE;
   }
diff --git a/demos/CUDA/BlackScholes/BlackScholes_gold.cpp b/demos/CUDA/BlackScholes/BlackScholes_gold.cpp
index b2fee109f..6b0d9c8ef 100644
--- a/demos/CUDA/BlackScholes/BlackScholes_gold.cpp
+++ b/demos/CUDA/BlackScholes/BlackScholes_gold.cpp
@@ -49,7 +49,8 @@ extern "C" double CND(double d) {
   double cnd = RSQRT2PI * exp(-0.5 * d * d) *
                (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
 
-  if (d > 0) cnd = 1.0 - cnd;
+  if (d > 0)
+    cnd = 1.0 - cnd;
 
   return cnd;
 }
@@ -57,13 +58,13 @@ extern "C" double CND(double d) {
 ////////////////////////////////////////////////////////////////////////////////
 // Black-Scholes formula for both call and put
 ////////////////////////////////////////////////////////////////////////////////
-static void BlackScholesBodyCPU(float &callResult, float &putResult,
-                                float Sf,  // Stock price
-                                float Xf,  // Option strike
-                                float Tf,  // Option years
-                                float Rf,  // Riskless rate
+static void BlackScholesBodyCPU(float& callResult, float& putResult,
+                                float Sf, // Stock price
+                                float Xf, // Option strike
+                                float Tf, // Option years
+                                float Rf, // Riskless rate
                                 float Vf  // Volatility rate
-                                ) {
+) {
   double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
 
   double sqrtT = sqrt(T);
@@ -81,9 +82,9 @@ static void BlackScholesBodyCPU(float &callResult, float &putResult,
 ////////////////////////////////////////////////////////////////////////////////
 // Process an array of optN options
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
-                                float *h_StockPrice, float *h_OptionStrike,
-                                float *h_OptionYears, float Riskfree,
+extern "C" void BlackScholesCPU(float* h_CallResult, float* h_PutResult,
+                                float* h_StockPrice, float* h_OptionStrike,
+                                float* h_OptionYears, float Riskfree,
                                 float Volatility, int optN) {
   for (int opt = 0; opt < optN; opt++)
     BlackScholesBodyCPU(h_CallResult[opt], h_PutResult[opt], h_StockPrice[opt],
diff --git a/demos/CUDA/BlackScholes/BlackScholes_kernel.cuh b/demos/CUDA/BlackScholes/BlackScholes_kernel.cuh
index dfcf5c575..26497b8ac 100644
--- a/demos/CUDA/BlackScholes/BlackScholes_kernel.cuh
+++ b/demos/CUDA/BlackScholes/BlackScholes_kernel.cuh
@@ -41,7 +41,8 @@ __device__ inline float cndGPU(float d) {
   float cnd = RSQRT2PI * expf(-0.5f * d * d) *
               (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
 
-  if (d > 0) cnd = 1.0f - cnd;
+  if (d > 0)
+    cnd = 1.0f - cnd;
 
   return cnd;
 }
@@ -49,13 +50,13 @@ __device__ inline float cndGPU(float d) {
 ////////////////////////////////////////////////////////////////////////////////
 // Black-Scholes formula for both call and put
 ////////////////////////////////////////////////////////////////////////////////
-__device__ inline void BlackScholesBodyGPU(float &CallResult, float &PutResult,
-                                           float S,  // Stock price
-                                           float X,  // Option strike
-                                           float T,  // Option years
-                                           float R,  // Riskless rate
+__device__ inline void BlackScholesBodyGPU(float& CallResult, float& PutResult,
+                                           float S, // Stock price
+                                           float X, // Option strike
+                                           float T, // Option years
+                                           float R, // Riskless rate
                                            float V  // Volatility rate
-                                           ) {
+) {
   float sqrtT, expRT;
   float d1, d2, CNDD1, CNDD2;
 
diff --git a/demos/CUDA/BlackScholes/helper/helper_cuda.h b/demos/CUDA/BlackScholes/helper/helper_cuda.h
index 22d8eeaa2..6666e8208 100644
--- a/demos/CUDA/BlackScholes/helper/helper_cuda.h
+++ b/demos/CUDA/BlackScholes/helper/helper_cuda.h
@@ -50,16 +50,16 @@
 
 // CUDA Runtime error messages
 #ifdef __DRIVER_TYPES_H__
-static const char *_cudaGetErrorEnum(cudaError_t error) {
+static const char* _cudaGetErrorEnum(cudaError_t error) {
   return cudaGetErrorName(error);
 }
 #endif
 
 #ifdef CUDA_DRIVER_API
 // CUDA Driver API errors
-static const char *_cudaGetErrorEnum(CUresult error) {
+static const char* _cudaGetErrorEnum(CUresult error) {
   static char unknown[] = "<unknown>";
-  const char *ret = NULL;
+  const char* ret = NULL;
   cuGetErrorName(error, &ret);
   return ret ? ret : unknown;
 }
@@ -67,37 +67,37 @@ static const char *_cudaGetErrorEnum(CUresult error) {
 
 #ifdef CUBLAS_API_H_
 // cuBLAS API errors
-static const char *_cudaGetErrorEnum(cublasStatus_t error) {
+static const char* _cudaGetErrorEnum(cublasStatus_t error) {
   switch (error) {
-    case CUBLAS_STATUS_SUCCESS:
-      return "CUBLAS_STATUS_SUCCESS";
+  case CUBLAS_STATUS_SUCCESS:
+    return "CUBLAS_STATUS_SUCCESS";
 
-    case CUBLAS_STATUS_NOT_INITIALIZED:
-      return "CUBLAS_STATUS_NOT_INITIALIZED";
+  case CUBLAS_STATUS_NOT_INITIALIZED:
+    return "CUBLAS_STATUS_NOT_INITIALIZED";
 
-    case CUBLAS_STATUS_ALLOC_FAILED:
-      return "CUBLAS_STATUS_ALLOC_FAILED";
+  case CUBLAS_STATUS_ALLOC_FAILED:
+    return "CUBLAS_STATUS_ALLOC_FAILED";
 
-    case CUBLAS_STATUS_INVALID_VALUE:
-      return "CUBLAS_STATUS_INVALID_VALUE";
+  case CUBLAS_STATUS_INVALID_VALUE:
+    return "CUBLAS_STATUS_INVALID_VALUE";
 
-    case CUBLAS_STATUS_ARCH_MISMATCH:
-      return "CUBLAS_STATUS_ARCH_MISMATCH";
+  case CUBLAS_STATUS_ARCH_MISMATCH:
+    return "CUBLAS_STATUS_ARCH_MISMATCH";
 
-    case CUBLAS_STATUS_MAPPING_ERROR:
-      return "CUBLAS_STATUS_MAPPING_ERROR";
+  case CUBLAS_STATUS_MAPPING_ERROR:
+    return "CUBLAS_STATUS_MAPPING_ERROR";
 
-    case CUBLAS_STATUS_EXECUTION_FAILED:
-      return "CUBLAS_STATUS_EXECUTION_FAILED";
+  case CUBLAS_STATUS_EXECUTION_FAILED:
+    return "CUBLAS_STATUS_EXECUTION_FAILED";
 
-    case CUBLAS_STATUS_INTERNAL_ERROR:
-      return "CUBLAS_STATUS_INTERNAL_ERROR";
+  case CUBLAS_STATUS_INTERNAL_ERROR:
+    return "CUBLAS_STATUS_INTERNAL_ERROR";
 
-    case CUBLAS_STATUS_NOT_SUPPORTED:
-      return "CUBLAS_STATUS_NOT_SUPPORTED";
+  case CUBLAS_STATUS_NOT_SUPPORTED:
+    return "CUBLAS_STATUS_NOT_SUPPORTED";
 
-    case CUBLAS_STATUS_LICENSE_ERROR:
-      return "CUBLAS_STATUS_LICENSE_ERROR";
+  case CUBLAS_STATUS_LICENSE_ERROR:
+    return "CUBLAS_STATUS_LICENSE_ERROR";
   }
 
   return "<unknown>";
@@ -106,58 +106,58 @@ static const char *_cudaGetErrorEnum(cublasStatus_t error) {
 
 #ifdef _CUFFT_H_
 // cuFFT API errors
-static const char *_cudaGetErrorEnum(cufftResult error) {
+static const char* _cudaGetErrorEnum(cufftResult error) {
   switch (error) {
-    case CUFFT_SUCCESS:
-      return "CUFFT_SUCCESS";
+  case CUFFT_SUCCESS:
+    return "CUFFT_SUCCESS";
 
-    case CUFFT_INVALID_PLAN:
-      return "CUFFT_INVALID_PLAN";
+  case CUFFT_INVALID_PLAN:
+    return "CUFFT_INVALID_PLAN";
 
-    case CUFFT_ALLOC_FAILED:
-      return "CUFFT_ALLOC_FAILED";
+  case CUFFT_ALLOC_FAILED:
+    return "CUFFT_ALLOC_FAILED";
 
-    case CUFFT_INVALID_TYPE:
-      return "CUFFT_INVALID_TYPE";
+  case CUFFT_INVALID_TYPE:
+    return "CUFFT_INVALID_TYPE";
 
-    case CUFFT_INVALID_VALUE:
-      return "CUFFT_INVALID_VALUE";
+  case CUFFT_INVALID_VALUE:
+    return "CUFFT_INVALID_VALUE";
 
-    case CUFFT_INTERNAL_ERROR:
-      return "CUFFT_INTERNAL_ERROR";
+  case CUFFT_INTERNAL_ERROR:
+    return "CUFFT_INTERNAL_ERROR";
 
-    case CUFFT_EXEC_FAILED:
-      return "CUFFT_EXEC_FAILED";
+  case CUFFT_EXEC_FAILED:
+    return "CUFFT_EXEC_FAILED";
 
-    case CUFFT_SETUP_FAILED:
-      return "CUFFT_SETUP_FAILED";
+  case CUFFT_SETUP_FAILED:
+    return "CUFFT_SETUP_FAILED";
 
-    case CUFFT_INVALID_SIZE:
-      return "CUFFT_INVALID_SIZE";
+  case CUFFT_INVALID_SIZE:
+    return "CUFFT_INVALID_SIZE";
 
-    case CUFFT_UNALIGNED_DATA:
-      return "CUFFT_UNALIGNED_DATA";
+  case CUFFT_UNALIGNED_DATA:
+    return "CUFFT_UNALIGNED_DATA";
 
-    case CUFFT_INCOMPLETE_PARAMETER_LIST:
-      return "CUFFT_INCOMPLETE_PARAMETER_LIST";
+  case CUFFT_INCOMPLETE_PARAMETER_LIST:
+    return "CUFFT_INCOMPLETE_PARAMETER_LIST";
 
-    case CUFFT_INVALID_DEVICE:
-      return "CUFFT_INVALID_DEVICE";
+  case CUFFT_INVALID_DEVICE:
+    return "CUFFT_INVALID_DEVICE";
 
-    case CUFFT_PARSE_ERROR:
-      return "CUFFT_PARSE_ERROR";
+  case CUFFT_PARSE_ERROR:
+    return "CUFFT_PARSE_ERROR";
 
-    case CUFFT_NO_WORKSPACE:
-      return "CUFFT_NO_WORKSPACE";
+  case CUFFT_NO_WORKSPACE:
+    return "CUFFT_NO_WORKSPACE";
 
-    case CUFFT_NOT_IMPLEMENTED:
-      return "CUFFT_NOT_IMPLEMENTED";
+  case CUFFT_NOT_IMPLEMENTED:
+    return "CUFFT_NOT_IMPLEMENTED";
 
-    case CUFFT_LICENSE_ERROR:
-      return "CUFFT_LICENSE_ERROR";
+  case CUFFT_LICENSE_ERROR:
+    return "CUFFT_LICENSE_ERROR";
 
-    case CUFFT_NOT_SUPPORTED:
-      return "CUFFT_NOT_SUPPORTED";
+  case CUFFT_NOT_SUPPORTED:
+    return "CUFFT_NOT_SUPPORTED";
   }
 
   return "<unknown>";
@@ -166,34 +166,34 @@ static const char *_cudaGetErrorEnum(cufftResult error) {
 
 #ifdef CUSPARSEAPI
 // cuSPARSE API errors
-static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
+static const char* _cudaGetErrorEnum(cusparseStatus_t error) {
   switch (error) {
-    case CUSPARSE_STATUS_SUCCESS:
-      return "CUSPARSE_STATUS_SUCCESS";
+  case CUSPARSE_STATUS_SUCCESS:
+    return "CUSPARSE_STATUS_SUCCESS";
 
-    case CUSPARSE_STATUS_NOT_INITIALIZED:
-      return "CUSPARSE_STATUS_NOT_INITIALIZED";
+  case CUSPARSE_STATUS_NOT_INITIALIZED:
+    return "CUSPARSE_STATUS_NOT_INITIALIZED";
 
-    case CUSPARSE_STATUS_ALLOC_FAILED:
-      return "CUSPARSE_STATUS_ALLOC_FAILED";
+  case CUSPARSE_STATUS_ALLOC_FAILED:
+    return "CUSPARSE_STATUS_ALLOC_FAILED";
 
-    case CUSPARSE_STATUS_INVALID_VALUE:
-      return "CUSPARSE_STATUS_INVALID_VALUE";
+  case CUSPARSE_STATUS_INVALID_VALUE:
+    return "CUSPARSE_STATUS_INVALID_VALUE";
 
-    case CUSPARSE_STATUS_ARCH_MISMATCH:
-      return "CUSPARSE_STATUS_ARCH_MISMATCH";
+  case CUSPARSE_STATUS_ARCH_MISMATCH:
+    return "CUSPARSE_STATUS_ARCH_MISMATCH";
 
-    case CUSPARSE_STATUS_MAPPING_ERROR:
-      return "CUSPARSE_STATUS_MAPPING_ERROR";
+  case CUSPARSE_STATUS_MAPPING_ERROR:
+    return "CUSPARSE_STATUS_MAPPING_ERROR";
 
-    case CUSPARSE_STATUS_EXECUTION_FAILED:
-      return "CUSPARSE_STATUS_EXECUTION_FAILED";
+  case CUSPARSE_STATUS_EXECUTION_FAILED:
+    return "CUSPARSE_STATUS_EXECUTION_FAILED";
 
-    case CUSPARSE_STATUS_INTERNAL_ERROR:
-      return "CUSPARSE_STATUS_INTERNAL_ERROR";
+  case CUSPARSE_STATUS_INTERNAL_ERROR:
+    return "CUSPARSE_STATUS_INTERNAL_ERROR";
 
-    case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-      return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+    return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
   }
 
   return "<unknown>";
@@ -202,32 +202,32 @@ static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
 
 #ifdef CUSOLVER_COMMON_H_
 // cuSOLVER API errors
-static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
+static const char* _cudaGetErrorEnum(cusolverStatus_t error) {
   switch (error) {
-    case CUSOLVER_STATUS_SUCCESS:
-      return "CUSOLVER_STATUS_SUCCESS";
-    case CUSOLVER_STATUS_NOT_INITIALIZED:
-      return "CUSOLVER_STATUS_NOT_INITIALIZED";
-    case CUSOLVER_STATUS_ALLOC_FAILED:
-      return "CUSOLVER_STATUS_ALLOC_FAILED";
-    case CUSOLVER_STATUS_INVALID_VALUE:
-      return "CUSOLVER_STATUS_INVALID_VALUE";
-    case CUSOLVER_STATUS_ARCH_MISMATCH:
-      return "CUSOLVER_STATUS_ARCH_MISMATCH";
-    case CUSOLVER_STATUS_MAPPING_ERROR:
-      return "CUSOLVER_STATUS_MAPPING_ERROR";
-    case CUSOLVER_STATUS_EXECUTION_FAILED:
-      return "CUSOLVER_STATUS_EXECUTION_FAILED";
-    case CUSOLVER_STATUS_INTERNAL_ERROR:
-      return "CUSOLVER_STATUS_INTERNAL_ERROR";
-    case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-      return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
-    case CUSOLVER_STATUS_NOT_SUPPORTED:
-      return "CUSOLVER_STATUS_NOT_SUPPORTED ";
-    case CUSOLVER_STATUS_ZERO_PIVOT:
-      return "CUSOLVER_STATUS_ZERO_PIVOT";
-    case CUSOLVER_STATUS_INVALID_LICENSE:
-      return "CUSOLVER_STATUS_INVALID_LICENSE";
+  case CUSOLVER_STATUS_SUCCESS:
+    return "CUSOLVER_STATUS_SUCCESS";
+  case CUSOLVER_STATUS_NOT_INITIALIZED:
+    return "CUSOLVER_STATUS_NOT_INITIALIZED";
+  case CUSOLVER_STATUS_ALLOC_FAILED:
+    return "CUSOLVER_STATUS_ALLOC_FAILED";
+  case CUSOLVER_STATUS_INVALID_VALUE:
+    return "CUSOLVER_STATUS_INVALID_VALUE";
+  case CUSOLVER_STATUS_ARCH_MISMATCH:
+    return "CUSOLVER_STATUS_ARCH_MISMATCH";
+  case CUSOLVER_STATUS_MAPPING_ERROR:
+    return "CUSOLVER_STATUS_MAPPING_ERROR";
+  case CUSOLVER_STATUS_EXECUTION_FAILED:
+    return "CUSOLVER_STATUS_EXECUTION_FAILED";
+  case CUSOLVER_STATUS_INTERNAL_ERROR:
+    return "CUSOLVER_STATUS_INTERNAL_ERROR";
+  case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+    return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+  case CUSOLVER_STATUS_NOT_SUPPORTED:
+    return "CUSOLVER_STATUS_NOT_SUPPORTED ";
+  case CUSOLVER_STATUS_ZERO_PIVOT:
+    return "CUSOLVER_STATUS_ZERO_PIVOT";
+  case CUSOLVER_STATUS_INVALID_LICENSE:
+    return "CUSOLVER_STATUS_INVALID_LICENSE";
   }
 
   return "<unknown>";
@@ -236,46 +236,46 @@ static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
 
 #ifdef CURAND_H_
 // cuRAND API errors
-static const char *_cudaGetErrorEnum(curandStatus_t error) {
+static const char* _cudaGetErrorEnum(curandStatus_t error) {
   switch (error) {
-    case CURAND_STATUS_SUCCESS:
-      return "CURAND_STATUS_SUCCESS";
+  case CURAND_STATUS_SUCCESS:
+    return "CURAND_STATUS_SUCCESS";
 
-    case CURAND_STATUS_VERSION_MISMATCH:
-      return "CURAND_STATUS_VERSION_MISMATCH";
+  case CURAND_STATUS_VERSION_MISMATCH:
+    return "CURAND_STATUS_VERSION_MISMATCH";
 
-    case CURAND_STATUS_NOT_INITIALIZED:
-      return "CURAND_STATUS_NOT_INITIALIZED";
+  case CURAND_STATUS_NOT_INITIALIZED:
+    return "CURAND_STATUS_NOT_INITIALIZED";
 
-    case CURAND_STATUS_ALLOCATION_FAILED:
-      return "CURAND_STATUS_ALLOCATION_FAILED";
+  case CURAND_STATUS_ALLOCATION_FAILED:
+    return "CURAND_STATUS_ALLOCATION_FAILED";
 
-    case CURAND_STATUS_TYPE_ERROR:
-      return "CURAND_STATUS_TYPE_ERROR";
+  case CURAND_STATUS_TYPE_ERROR:
+    return "CURAND_STATUS_TYPE_ERROR";
 
-    case CURAND_STATUS_OUT_OF_RANGE:
-      return "CURAND_STATUS_OUT_OF_RANGE";
+  case CURAND_STATUS_OUT_OF_RANGE:
+    return "CURAND_STATUS_OUT_OF_RANGE";
 
-    case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
-      return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
+  case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
+    return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
 
-    case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
-      return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
+  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+    return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
 
-    case CURAND_STATUS_LAUNCH_FAILURE:
-      return "CURAND_STATUS_LAUNCH_FAILURE";
+  case CURAND_STATUS_LAUNCH_FAILURE:
+    return "CURAND_STATUS_LAUNCH_FAILURE";
 
-    case CURAND_STATUS_PREEXISTING_FAILURE:
-      return "CURAND_STATUS_PREEXISTING_FAILURE";
+  case CURAND_STATUS_PREEXISTING_FAILURE:
+    return "CURAND_STATUS_PREEXISTING_FAILURE";
 
-    case CURAND_STATUS_INITIALIZATION_FAILED:
-      return "CURAND_STATUS_INITIALIZATION_FAILED";
+  case CURAND_STATUS_INITIALIZATION_FAILED:
+    return "CURAND_STATUS_INITIALIZATION_FAILED";
 
-    case CURAND_STATUS_ARCH_MISMATCH:
-      return "CURAND_STATUS_ARCH_MISMATCH";
+  case CURAND_STATUS_ARCH_MISMATCH:
+    return "CURAND_STATUS_ARCH_MISMATCH";
 
-    case CURAND_STATUS_INTERNAL_ERROR:
-      return "CURAND_STATUS_INTERNAL_ERROR";
+  case CURAND_STATUS_INTERNAL_ERROR:
+    return "CURAND_STATUS_INTERNAL_ERROR";
   }
 
   return "<unknown>";
@@ -284,34 +284,34 @@ static const char *_cudaGetErrorEnum(curandStatus_t error) {
 
 #ifdef NVJPEGAPI
 // nvJPEG API errors
-static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
+static const char* _cudaGetErrorEnum(nvjpegStatus_t error) {
   switch (error) {
-    case NVJPEG_STATUS_SUCCESS:
-      return "NVJPEG_STATUS_SUCCESS";
+  case NVJPEG_STATUS_SUCCESS:
+    return "NVJPEG_STATUS_SUCCESS";
 
-    case NVJPEG_STATUS_NOT_INITIALIZED:
-      return "NVJPEG_STATUS_NOT_INITIALIZED";
+  case NVJPEG_STATUS_NOT_INITIALIZED:
+    return "NVJPEG_STATUS_NOT_INITIALIZED";
 
-    case NVJPEG_STATUS_INVALID_PARAMETER:
-      return "NVJPEG_STATUS_INVALID_PARAMETER";
+  case NVJPEG_STATUS_INVALID_PARAMETER:
+    return "NVJPEG_STATUS_INVALID_PARAMETER";
 
-    case NVJPEG_STATUS_BAD_JPEG:
-      return "NVJPEG_STATUS_BAD_JPEG";
+  case NVJPEG_STATUS_BAD_JPEG:
+    return "NVJPEG_STATUS_BAD_JPEG";
 
-    case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
-      return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
+  case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
+    return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
 
-    case NVJPEG_STATUS_ALLOCATOR_FAILURE:
-      return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
+  case NVJPEG_STATUS_ALLOCATOR_FAILURE:
+    return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
 
-    case NVJPEG_STATUS_EXECUTION_FAILED:
-      return "NVJPEG_STATUS_EXECUTION_FAILED";
+  case NVJPEG_STATUS_EXECUTION_FAILED:
+    return "NVJPEG_STATUS_EXECUTION_FAILED";
 
-    case NVJPEG_STATUS_ARCH_MISMATCH:
-      return "NVJPEG_STATUS_ARCH_MISMATCH";
+  case NVJPEG_STATUS_ARCH_MISMATCH:
+    return "NVJPEG_STATUS_ARCH_MISMATCH";
 
-    case NVJPEG_STATUS_INTERNAL_ERROR:
-      return "NVJPEG_STATUS_INTERNAL_ERROR";
+  case NVJPEG_STATUS_INTERNAL_ERROR:
+    return "NVJPEG_STATUS_INTERNAL_ERROR";
   }
 
   return "<unknown>";
@@ -320,258 +320,258 @@ static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
 
 #ifdef NV_NPPIDEFS_H
 // NPP API errors
-static const char *_cudaGetErrorEnum(NppStatus error) {
+static const char* _cudaGetErrorEnum(NppStatus error) {
   switch (error) {
-    case NPP_NOT_SUPPORTED_MODE_ERROR:
-      return "NPP_NOT_SUPPORTED_MODE_ERROR";
+  case NPP_NOT_SUPPORTED_MODE_ERROR:
+    return "NPP_NOT_SUPPORTED_MODE_ERROR";
 
-    case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
-      return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
+  case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
+    return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
 
-    case NPP_RESIZE_NO_OPERATION_ERROR:
-      return "NPP_RESIZE_NO_OPERATION_ERROR";
+  case NPP_RESIZE_NO_OPERATION_ERROR:
+    return "NPP_RESIZE_NO_OPERATION_ERROR";
 
-    case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
-      return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
+  case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
+    return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
 
 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
 
-    case NPP_BAD_ARG_ERROR:
-      return "NPP_BAD_ARGUMENT_ERROR";
+  case NPP_BAD_ARG_ERROR:
+    return "NPP_BAD_ARGUMENT_ERROR";
 
-    case NPP_COEFF_ERROR:
-      return "NPP_COEFFICIENT_ERROR";
+  case NPP_COEFF_ERROR:
+    return "NPP_COEFFICIENT_ERROR";
 
-    case NPP_RECT_ERROR:
-      return "NPP_RECTANGLE_ERROR";
+  case NPP_RECT_ERROR:
+    return "NPP_RECTANGLE_ERROR";
 
-    case NPP_QUAD_ERROR:
-      return "NPP_QUADRANGLE_ERROR";
+  case NPP_QUAD_ERROR:
+    return "NPP_QUADRANGLE_ERROR";
 
-    case NPP_MEM_ALLOC_ERR:
-      return "NPP_MEMORY_ALLOCATION_ERROR";
+  case NPP_MEM_ALLOC_ERR:
+    return "NPP_MEMORY_ALLOCATION_ERROR";
 
-    case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
-      return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
+  case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
+    return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
 
-    case NPP_INVALID_INPUT:
-      return "NPP_INVALID_INPUT";
+  case NPP_INVALID_INPUT:
+    return "NPP_INVALID_INPUT";
 
-    case NPP_POINTER_ERROR:
-      return "NPP_POINTER_ERROR";
+  case NPP_POINTER_ERROR:
+    return "NPP_POINTER_ERROR";
 
-    case NPP_WARNING:
-      return "NPP_WARNING";
+  case NPP_WARNING:
+    return "NPP_WARNING";
 
-    case NPP_ODD_ROI_WARNING:
-      return "NPP_ODD_ROI_WARNING";
+  case NPP_ODD_ROI_WARNING:
+    return "NPP_ODD_ROI_WARNING";
 #else
 
-    // These are for CUDA 5.5 or higher
-    case NPP_BAD_ARGUMENT_ERROR:
-      return "NPP_BAD_ARGUMENT_ERROR";
+  // These are for CUDA 5.5 or higher
+  case NPP_BAD_ARGUMENT_ERROR:
+    return "NPP_BAD_ARGUMENT_ERROR";
 
-    case NPP_COEFFICIENT_ERROR:
-      return "NPP_COEFFICIENT_ERROR";
+  case NPP_COEFFICIENT_ERROR:
+    return "NPP_COEFFICIENT_ERROR";
 
-    case NPP_RECTANGLE_ERROR:
-      return "NPP_RECTANGLE_ERROR";
+  case NPP_RECTANGLE_ERROR:
+    return "NPP_RECTANGLE_ERROR";
 
-    case NPP_QUADRANGLE_ERROR:
-      return "NPP_QUADRANGLE_ERROR";
+  case NPP_QUADRANGLE_ERROR:
+    return "NPP_QUADRANGLE_ERROR";
 
-    case NPP_MEMORY_ALLOCATION_ERR:
-      return "NPP_MEMORY_ALLOCATION_ERROR";
+  case NPP_MEMORY_ALLOCATION_ERR:
+    return "NPP_MEMORY_ALLOCATION_ERROR";
 
-    case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
-      return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
+  case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
+    return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
 
-    case NPP_INVALID_HOST_POINTER_ERROR:
-      return "NPP_INVALID_HOST_POINTER_ERROR";
+  case NPP_INVALID_HOST_POINTER_ERROR:
+    return "NPP_INVALID_HOST_POINTER_ERROR";
 
-    case NPP_INVALID_DEVICE_POINTER_ERROR:
-      return "NPP_INVALID_DEVICE_POINTER_ERROR";
+  case NPP_INVALID_DEVICE_POINTER_ERROR:
+    return "NPP_INVALID_DEVICE_POINTER_ERROR";
 #endif
 
-    case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
-      return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
+  case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
+    return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
 
-    case NPP_TEXTURE_BIND_ERROR:
-      return "NPP_TEXTURE_BIND_ERROR";
+  case NPP_TEXTURE_BIND_ERROR:
+    return "NPP_TEXTURE_BIND_ERROR";
 
-    case NPP_WRONG_INTERSECTION_ROI_ERROR:
-      return "NPP_WRONG_INTERSECTION_ROI_ERROR";
+  case NPP_WRONG_INTERSECTION_ROI_ERROR:
+    return "NPP_WRONG_INTERSECTION_ROI_ERROR";
 
-    case NPP_NOT_EVEN_STEP_ERROR:
-      return "NPP_NOT_EVEN_STEP_ERROR";
+  case NPP_NOT_EVEN_STEP_ERROR:
+    return "NPP_NOT_EVEN_STEP_ERROR";
 
-    case NPP_INTERPOLATION_ERROR:
-      return "NPP_INTERPOLATION_ERROR";
+  case NPP_INTERPOLATION_ERROR:
+    return "NPP_INTERPOLATION_ERROR";
 
-    case NPP_RESIZE_FACTOR_ERROR:
-      return "NPP_RESIZE_FACTOR_ERROR";
+  case NPP_RESIZE_FACTOR_ERROR:
+    return "NPP_RESIZE_FACTOR_ERROR";
 
-    case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
-      return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
+  case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
+    return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
 
 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
 
-    case NPP_MEMFREE_ERR:
-      return "NPP_MEMFREE_ERR";
+  case NPP_MEMFREE_ERR:
+    return "NPP_MEMFREE_ERR";
 
-    case NPP_MEMSET_ERR:
-      return "NPP_MEMSET_ERR";
+  case NPP_MEMSET_ERR:
+    return "NPP_MEMSET_ERR";
 
-    case NPP_MEMCPY_ERR:
-      return "NPP_MEMCPY_ERROR";
+  case NPP_MEMCPY_ERR:
+    return "NPP_MEMCPY_ERROR";
 
-    case NPP_MIRROR_FLIP_ERR:
-      return "NPP_MIRROR_FLIP_ERR";
+  case NPP_MIRROR_FLIP_ERR:
+    return "NPP_MIRROR_FLIP_ERR";
 #else
 
-    case NPP_MEMFREE_ERROR:
-      return "NPP_MEMFREE_ERROR";
+  case NPP_MEMFREE_ERROR:
+    return "NPP_MEMFREE_ERROR";
 
-    case NPP_MEMSET_ERROR:
-      return "NPP_MEMSET_ERROR";
+  case NPP_MEMSET_ERROR:
+    return "NPP_MEMSET_ERROR";
 
-    case NPP_MEMCPY_ERROR:
-      return "NPP_MEMCPY_ERROR";
+  case NPP_MEMCPY_ERROR:
+    return "NPP_MEMCPY_ERROR";
 
-    case NPP_MIRROR_FLIP_ERROR:
-      return "NPP_MIRROR_FLIP_ERROR";
+  case NPP_MIRROR_FLIP_ERROR:
+    return "NPP_MIRROR_FLIP_ERROR";
 #endif
 
-    case NPP_ALIGNMENT_ERROR:
-      return "NPP_ALIGNMENT_ERROR";
+  case NPP_ALIGNMENT_ERROR:
+    return "NPP_ALIGNMENT_ERROR";
 
-    case NPP_STEP_ERROR:
-      return "NPP_STEP_ERROR";
+  case NPP_STEP_ERROR:
+    return "NPP_STEP_ERROR";
 
-    case NPP_SIZE_ERROR:
-      return "NPP_SIZE_ERROR";
+  case NPP_SIZE_ERROR:
+    return "NPP_SIZE_ERROR";
 
-    case NPP_NULL_POINTER_ERROR:
-      return "NPP_NULL_POINTER_ERROR";
+  case NPP_NULL_POINTER_ERROR:
+    return "NPP_NULL_POINTER_ERROR";
 
-    case NPP_CUDA_KERNEL_EXECUTION_ERROR:
-      return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
+  case NPP_CUDA_KERNEL_EXECUTION_ERROR:
+    return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
 
-    case NPP_NOT_IMPLEMENTED_ERROR:
-      return "NPP_NOT_IMPLEMENTED_ERROR";
+  case NPP_NOT_IMPLEMENTED_ERROR:
+    return "NPP_NOT_IMPLEMENTED_ERROR";
 
-    case NPP_ERROR:
-      return "NPP_ERROR";
+  case NPP_ERROR:
+    return "NPP_ERROR";
 
-    case NPP_SUCCESS:
-      return "NPP_SUCCESS";
+  case NPP_SUCCESS:
+    return "NPP_SUCCESS";
 
-    case NPP_WRONG_INTERSECTION_QUAD_WARNING:
-      return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
+  case NPP_WRONG_INTERSECTION_QUAD_WARNING:
+    return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
 
-    case NPP_MISALIGNED_DST_ROI_WARNING:
-      return "NPP_MISALIGNED_DST_ROI_WARNING";
+  case NPP_MISALIGNED_DST_ROI_WARNING:
+    return "NPP_MISALIGNED_DST_ROI_WARNING";
 
-    case NPP_AFFINE_QUAD_INCORRECT_WARNING:
-      return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
+  case NPP_AFFINE_QUAD_INCORRECT_WARNING:
+    return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
 
-    case NPP_DOUBLE_SIZE_WARNING:
-      return "NPP_DOUBLE_SIZE_WARNING";
+  case NPP_DOUBLE_SIZE_WARNING:
+    return "NPP_DOUBLE_SIZE_WARNING";
 
-    case NPP_WRONG_INTERSECTION_ROI_WARNING:
-      return "NPP_WRONG_INTERSECTION_ROI_WARNING";
+  case NPP_WRONG_INTERSECTION_ROI_WARNING:
+    return "NPP_WRONG_INTERSECTION_ROI_WARNING";
 
 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
-    /* These are 6.0 or higher */
-    case NPP_LUT_PALETTE_BITSIZE_ERROR:
-      return "NPP_LUT_PALETTE_BITSIZE_ERROR";
+  /* These are 6.0 or higher */
+  case NPP_LUT_PALETTE_BITSIZE_ERROR:
+    return "NPP_LUT_PALETTE_BITSIZE_ERROR";
 
-    case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
-      return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
+  case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
+    return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
 
-    case NPP_QUALITY_INDEX_ERROR:
-      return "NPP_QUALITY_INDEX_ERROR";
+  case NPP_QUALITY_INDEX_ERROR:
+    return "NPP_QUALITY_INDEX_ERROR";
 
-    case NPP_CHANNEL_ORDER_ERROR:
-      return "NPP_CHANNEL_ORDER_ERROR";
+  case NPP_CHANNEL_ORDER_ERROR:
+    return "NPP_CHANNEL_ORDER_ERROR";
 
-    case NPP_ZERO_MASK_VALUE_ERROR:
-      return "NPP_ZERO_MASK_VALUE_ERROR";
+  case NPP_ZERO_MASK_VALUE_ERROR:
+    return "NPP_ZERO_MASK_VALUE_ERROR";
 
-    case NPP_NUMBER_OF_CHANNELS_ERROR:
-      return "NPP_NUMBER_OF_CHANNELS_ERROR";
+  case NPP_NUMBER_OF_CHANNELS_ERROR:
+    return "NPP_NUMBER_OF_CHANNELS_ERROR";
 
-    case NPP_COI_ERROR:
-      return "NPP_COI_ERROR";
+  case NPP_COI_ERROR:
+    return "NPP_COI_ERROR";
 
-    case NPP_DIVISOR_ERROR:
-      return "NPP_DIVISOR_ERROR";
+  case NPP_DIVISOR_ERROR:
+    return "NPP_DIVISOR_ERROR";
 
-    case NPP_CHANNEL_ERROR:
-      return "NPP_CHANNEL_ERROR";
+  case NPP_CHANNEL_ERROR:
+    return "NPP_CHANNEL_ERROR";
 
-    case NPP_STRIDE_ERROR:
-      return "NPP_STRIDE_ERROR";
+  case NPP_STRIDE_ERROR:
+    return "NPP_STRIDE_ERROR";
 
-    case NPP_ANCHOR_ERROR:
-      return "NPP_ANCHOR_ERROR";
+  case NPP_ANCHOR_ERROR:
+    return "NPP_ANCHOR_ERROR";
 
-    case NPP_MASK_SIZE_ERROR:
-      return "NPP_MASK_SIZE_ERROR";
+  case NPP_MASK_SIZE_ERROR:
+    return "NPP_MASK_SIZE_ERROR";
 
-    case NPP_MOMENT_00_ZERO_ERROR:
-      return "NPP_MOMENT_00_ZERO_ERROR";
+  case NPP_MOMENT_00_ZERO_ERROR:
+    return "NPP_MOMENT_00_ZERO_ERROR";
 
-    case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
-      return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
+  case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
+    return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
 
-    case NPP_THRESHOLD_ERROR:
-      return "NPP_THRESHOLD_ERROR";
+  case NPP_THRESHOLD_ERROR:
+    return "NPP_THRESHOLD_ERROR";
 
-    case NPP_CONTEXT_MATCH_ERROR:
-      return "NPP_CONTEXT_MATCH_ERROR";
+  case NPP_CONTEXT_MATCH_ERROR:
+    return "NPP_CONTEXT_MATCH_ERROR";
 
-    case NPP_FFT_FLAG_ERROR:
-      return "NPP_FFT_FLAG_ERROR";
+  case NPP_FFT_FLAG_ERROR:
+    return "NPP_FFT_FLAG_ERROR";
 
-    case NPP_FFT_ORDER_ERROR:
-      return "NPP_FFT_ORDER_ERROR";
+  case NPP_FFT_ORDER_ERROR:
+    return "NPP_FFT_ORDER_ERROR";
 
-    case NPP_SCALE_RANGE_ERROR:
-      return "NPP_SCALE_RANGE_ERROR";
+  case NPP_SCALE_RANGE_ERROR:
+    return "NPP_SCALE_RANGE_ERROR";
 
-    case NPP_DATA_TYPE_ERROR:
-      return "NPP_DATA_TYPE_ERROR";
+  case NPP_DATA_TYPE_ERROR:
+    return "NPP_DATA_TYPE_ERROR";
 
-    case NPP_OUT_OFF_RANGE_ERROR:
-      return "NPP_OUT_OFF_RANGE_ERROR";
+  case NPP_OUT_OFF_RANGE_ERROR:
+    return "NPP_OUT_OFF_RANGE_ERROR";
 
-    case NPP_DIVIDE_BY_ZERO_ERROR:
-      return "NPP_DIVIDE_BY_ZERO_ERROR";
+  case NPP_DIVIDE_BY_ZERO_ERROR:
+    return "NPP_DIVIDE_BY_ZERO_ERROR";
 
-    case NPP_RANGE_ERROR:
-      return "NPP_RANGE_ERROR";
+  case NPP_RANGE_ERROR:
+    return "NPP_RANGE_ERROR";
 
-    case NPP_NO_MEMORY_ERROR:
-      return "NPP_NO_MEMORY_ERROR";
+  case NPP_NO_MEMORY_ERROR:
+    return "NPP_NO_MEMORY_ERROR";
 
-    case NPP_ERROR_RESERVED:
-      return "NPP_ERROR_RESERVED";
+  case NPP_ERROR_RESERVED:
+    return "NPP_ERROR_RESERVED";
 
-    case NPP_NO_OPERATION_WARNING:
-      return "NPP_NO_OPERATION_WARNING";
+  case NPP_NO_OPERATION_WARNING:
+    return "NPP_NO_OPERATION_WARNING";
 
-    case NPP_DIVIDE_BY_ZERO_WARNING:
-      return "NPP_DIVIDE_BY_ZERO_WARNING";
+  case NPP_DIVIDE_BY_ZERO_WARNING:
+    return "NPP_DIVIDE_BY_ZERO_WARNING";
 #endif
 
 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
-    /* These are 7.0 or higher */
-    case NPP_OVERFLOW_ERROR:
-      return "NPP_OVERFLOW_ERROR";
+  /* These are 7.0 or higher */
+  case NPP_OVERFLOW_ERROR:
+    return "NPP_OVERFLOW_ERROR";
 
-    case NPP_CORRUPTED_DATA_ERROR:
-      return "NPP_CORRUPTED_DATA_ERROR";
+  case NPP_CORRUPTED_DATA_ERROR:
+    return "NPP_CORRUPTED_DATA_ERROR";
 #endif
   }
 
@@ -580,8 +580,8 @@ static const char *_cudaGetErrorEnum(NppStatus error) {
 #endif
 
 template <typename T>
-void check(T result, char const *const func, const char *const file,
-           int const line) {
+void check(T result, const char* const func, const char* const file,
+           const int line) {
   if (result) {
     fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
             static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
@@ -597,7 +597,7 @@ void check(T result, char const *const func, const char *const file,
 // This will output the proper error string when calling cudaGetLastError
 #define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
 
-inline void __getLastCudaError(const char *errorMessage, const char *file,
+inline void __getLastCudaError(const char* errorMessage, const char* file,
                                const int line) {
   cudaError_t err = cudaGetLastError();
 
@@ -615,7 +615,7 @@ inline void __getLastCudaError(const char *errorMessage, const char *file,
 // but not exit program incase error detected.
 #define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
 
-inline void __printLastCudaError(const char *errorMessage, const char *file,
+inline void __printLastCudaError(const char* errorMessage, const char* file,
                                  const int line) {
   cudaError_t err = cudaGetLastError();
 
@@ -644,48 +644,31 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
   // Defines for GPU Architecture types (using the SM version to determine
   // the # of cores per SM
   typedef struct {
-    int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
+    int SM; // 0xMm (hexidecimal notation), M = SM Major version,
     // and m = SM minor version
     int Cores;
   } sSMtoCores;
 
   sSMtoCores nGpuArchCoresPerSM[] = {
-      {0x30, 192},
-      {0x32, 192},
-      {0x35, 192},
-      {0x37, 192},
-      {0x50, 128},
-      {0x52, 128},
-      {0x53, 128},
-      {0x60,  64},
-      {0x61, 128},
-      {0x62, 128},
-      {0x70,  64},
-      {0x72,  64},
-      {0x75,  64},
-      {0x80,  64},
-      {0x86, 128},
-      {0x87, 128},
-      {0x89, 128},
-      {0x90, 128},
-      {-1, -1}};
+      {0x30, 192}, {0x32, 192}, {0x35, 192}, {0x37, 192}, {0x50, 128},
+      {0x52, 128}, {0x53, 128}, {0x60, 64},  {0x61, 128}, {0x62, 128},
+      {0x70, 64},  {0x72, 64},  {0x75, 64},  {0x80, 64},  {0x86, 128},
+      {0x87, 128}, {0x89, 128}, {0x90, 128}, {-1, -1}};
 
   int index = 0;
 
   while (nGpuArchCoresPerSM[index].SM != -1) {
-    if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
+    if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
       return nGpuArchCoresPerSM[index].Cores;
-    }
 
     index++;
   }
 
   // If we don't find the values, we default use the previous one
   // to run properly
-  printf(
-      "MapSMtoCores for SM %d.%d is undefined."
-      "  Default to use %d Cores/SM\n",
-      major, minor, nGpuArchCoresPerSM[index - 1].Cores);
+  printf("MapSMtoCores for SM %d.%d is undefined."
+         "  Default to use %d Cores/SM\n",
+         major, minor, nGpuArchCoresPerSM[index - 1].Cores);
   return nGpuArchCoresPerSM[index - 1].Cores;
 }
 
@@ -693,51 +676,37 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
   // Defines for GPU Architecture types (using the SM version to determine
   // the GPU Arch name)
   typedef struct {
-    int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
+    int SM; // 0xMm (hexidecimal notation), M = SM Major version,
     // and m = SM minor version
     const char* name;
   } sSMtoArchName;
 
   sSMtoArchName nGpuArchNameSM[] = {
-      {0x30, "Kepler"},
-      {0x32, "Kepler"},
-      {0x35, "Kepler"},
-      {0x37, "Kepler"},
-      {0x50, "Maxwell"},
-      {0x52, "Maxwell"},
-      {0x53, "Maxwell"},
-      {0x60, "Pascal"},
-      {0x61, "Pascal"},
-      {0x62, "Pascal"},
-      {0x70, "Volta"},
-      {0x72, "Xavier"},
-      {0x75, "Turing"},
-      {0x80, "Ampere"},
-      {0x86, "Ampere"},
-      {0x87, "Ampere"},
-      {0x89, "Ada"},
-      {0x90, "Hopper"},
+      {0x30, "Kepler"},       {0x32, "Kepler"},  {0x35, "Kepler"},
+      {0x37, "Kepler"},       {0x50, "Maxwell"}, {0x52, "Maxwell"},
+      {0x53, "Maxwell"},      {0x60, "Pascal"},  {0x61, "Pascal"},
+      {0x62, "Pascal"},       {0x70, "Volta"},   {0x72, "Xavier"},
+      {0x75, "Turing"},       {0x80, "Ampere"},  {0x86, "Ampere"},
+      {0x87, "Ampere"},       {0x89, "Ada"},     {0x90, "Hopper"},
       {-1, "Graphics Device"}};
 
   int index = 0;
 
   while (nGpuArchNameSM[index].SM != -1) {
-    if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
+    if (nGpuArchNameSM[index].SM == ((major << 4) + minor))
       return nGpuArchNameSM[index].name;
-    }
 
     index++;
   }
 
   // If we don't find the values, we default use the previous one
   // to run properly
-  printf(
-      "MapSMtoArchName for SM %d.%d is undefined."
-      "  Default to use %s\n",
-      major, minor, nGpuArchNameSM[index - 1].name);
+  printf("MapSMtoArchName for SM %d.%d is undefined."
+         "  Default to use %s\n",
+         major, minor, nGpuArchNameSM[index - 1].name);
   return nGpuArchNameSM[index - 1].name;
 }
-  // end of GPU Architecture definitions
+// end of GPU Architecture definitions
 
 #ifdef __CUDA_RUNTIME_H__
 // General GPU Device CUDA Initialization
@@ -746,15 +715,13 @@ inline int gpuDeviceInit(int devID) {
   checkCudaErrors(cudaGetDeviceCount(&device_count));
 
   if (device_count == 0) {
-    fprintf(stderr,
-            "gpuDeviceInit() CUDA error: "
-            "no devices supporting CUDA.\n");
+    fprintf(stderr, "gpuDeviceInit() CUDA error: "
+                    "no devices supporting CUDA.\n");
     exit(EXIT_FAILURE);
   }
 
-  if (devID < 0) {
+  if (devID < 0)
     devID = 0;
-  }
 
   if (devID > device_count - 1) {
     fprintf(stderr, "\n");
@@ -769,13 +736,15 @@ inline int gpuDeviceInit(int devID) {
   }
 
   int computeMode = -1, major = 0, minor = 0;
-  checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
-  checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
-  checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
+  checkCudaErrors(
+      cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
+  checkCudaErrors(
+      cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
+  checkCudaErrors(
+      cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
   if (computeMode == cudaComputeModeProhibited) {
-    fprintf(stderr,
-            "Error: device is running in <Compute Mode "
-            "Prohibited>, no threads can use cudaSetDevice().\n");
+    fprintf(stderr, "Error: device is running in <Compute Mode "
+                    "Prohibited>, no threads can use cudaSetDevice().\n");
     return -1;
   }
 
@@ -785,7 +754,8 @@ inline int gpuDeviceInit(int devID) {
   }
 
   checkCudaErrors(cudaSetDevice(devID));
-  printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor));
+  printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID,
+         _ConvertSMVer2ArchName(major, minor));
 
   return devID;
 }
@@ -801,9 +771,8 @@ inline int gpuGetMaxGflopsDeviceId() {
   checkCudaErrors(cudaGetDeviceCount(&device_count));
 
   if (device_count == 0) {
-    fprintf(stderr,
-            "gpuGetMaxGflopsDeviceId() CUDA error:"
-            " no devices supporting CUDA.\n");
+    fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error:"
+                    " no devices supporting CUDA.\n");
     exit(EXIT_FAILURE);
   }
 
@@ -812,35 +781,40 @@ inline int gpuGetMaxGflopsDeviceId() {
 
   while (current_device < device_count) {
     int computeMode = -1, major = 0, minor = 0;
-    checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
-    checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
-    checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
+    checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode,
+                                           current_device));
+    checkCudaErrors(cudaDeviceGetAttribute(
+        &major, cudaDevAttrComputeCapabilityMajor, current_device));
+    checkCudaErrors(cudaDeviceGetAttribute(
+        &minor, cudaDevAttrComputeCapabilityMinor, current_device));
 
     // If this GPU is not running on Compute Mode prohibited,
     // then we can add it to the list
     if (computeMode != cudaComputeModeProhibited) {
-      if (major == 9999 && minor == 9999) {
+      if (major == 9999 && minor == 9999)
         sm_per_multiproc = 1;
-      } else {
-        sm_per_multiproc =
-            _ConvertSMVer2Cores(major,  minor);
-      }
+      else
+        sm_per_multiproc = _ConvertSMVer2Cores(major, minor);
       int multiProcessorCount = 0, clockRate = 0;
-      checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
-      cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device);
+      checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount,
+                                             cudaDevAttrMultiProcessorCount,
+                                             current_device));
+      cudaError_t result = cudaDeviceGetAttribute(
+          &clockRate, cudaDevAttrClockRate, current_device);
       if (result != cudaSuccess) {
         // If cudaDevAttrClockRate attribute is not supported we
         // set clockRate as 1, to consider GPU with most SMs and CUDA Cores.
-        if(result == cudaErrorInvalidValue) {
+        if (result == cudaErrorInvalidValue) {
           clockRate = 1;
-        }
-        else {
-          fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__,
-            static_cast<unsigned int>(result), _cudaGetErrorEnum(result));
+        } else {
+          fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__,
+                  __LINE__, static_cast<unsigned int>(result),
+                  _cudaGetErrorEnum(result));
           exit(EXIT_FAILURE);
         }
       }
-      uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
+      uint64_t compute_perf =
+          (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
 
       if (compute_perf > max_compute_perf) {
         max_compute_perf = compute_perf;
@@ -854,9 +828,8 @@ inline int gpuGetMaxGflopsDeviceId() {
   }
 
   if (devices_prohibited == device_count) {
-    fprintf(stderr,
-            "gpuGetMaxGflopsDeviceId() CUDA error:"
-            " all devices have compute mode prohibited.\n");
+    fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error:"
+                    " all devices have compute mode prohibited.\n");
     exit(EXIT_FAILURE);
   }
 
@@ -864,7 +837,7 @@ inline int gpuGetMaxGflopsDeviceId() {
 }
 
 // Initialization code to find the best CUDA Device
-inline int findCudaDevice(int argc, const char **argv) {
+inline int findCudaDevice(int argc, const char** argv) {
   int devID = 0;
 
   // If the command-line has a device number specified, use it
@@ -887,11 +860,12 @@ inline int findCudaDevice(int argc, const char **argv) {
     devID = gpuGetMaxGflopsDeviceId();
     checkCudaErrors(cudaSetDevice(devID));
     int major = 0, minor = 0;
-    checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
-    checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
-    printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
-           devID, _ConvertSMVer2ArchName(major, minor), major, minor);
-
+    checkCudaErrors(cudaDeviceGetAttribute(
+        &major, cudaDevAttrComputeCapabilityMajor, devID));
+    checkCudaErrors(cudaDeviceGetAttribute(
+        &minor, cudaDevAttrComputeCapabilityMinor, devID));
+    printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID,
+           _ConvertSMVer2ArchName(major, minor), major, minor);
   }
 
   return devID;
@@ -912,18 +886,23 @@ inline int findIntegratedGPU() {
   // Find the integrated GPU which is compute capable
   while (current_device < device_count) {
     int computeMode = -1, integrated = -1;
-    checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
-    checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
+    checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode,
+                                           current_device));
+    checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated,
+                                           current_device));
     // If GPU is integrated and is not running on Compute Mode prohibited,
     // then cuda can map to GLES resource
     if (integrated && (computeMode != cudaComputeModeProhibited)) {
       checkCudaErrors(cudaSetDevice(current_device));
 
       int major = 0, minor = 0;
-      checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
-      checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
+      checkCudaErrors(cudaDeviceGetAttribute(
+          &major, cudaDevAttrComputeCapabilityMajor, current_device));
+      checkCudaErrors(cudaDeviceGetAttribute(
+          &minor, cudaDevAttrComputeCapabilityMinor, current_device));
       printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
-             current_device, _ConvertSMVer2ArchName(major, minor), major, minor);
+             current_device, _ConvertSMVer2ArchName(major, minor), major,
+             minor);
 
       return current_device;
     } else {
@@ -934,9 +913,8 @@ inline int findIntegratedGPU() {
   }
 
   if (devices_prohibited == device_count) {
-    fprintf(stderr,
-            "CUDA error:"
-            " No GLES-CUDA Interop capable GPU found.\n");
+    fprintf(stderr, "CUDA error:"
+                    " No GLES-CUDA Interop capable GPU found.\n");
     exit(EXIT_FAILURE);
   }
 
@@ -949,25 +927,25 @@ inline bool checkCudaCapabilities(int major_version, int minor_version) {
   int major = 0, minor = 0;
 
   checkCudaErrors(cudaGetDevice(&dev));
-  checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
-  checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
+  checkCudaErrors(
+      cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
+  checkCudaErrors(
+      cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
 
   if ((major > major_version) ||
-      (major == major_version &&
-       minor >= minor_version)) {
+      (major == major_version && minor >= minor_version)) {
     printf("  Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
            _ConvertSMVer2ArchName(major, minor), major, minor);
     return true;
   } else {
-    printf(
-        "  No GPU device was found that can support "
-        "CUDA compute capability %d.%d.\n",
-        major_version, minor_version);
+    printf("  No GPU device was found that can support "
+           "CUDA compute capability %d.%d.\n",
+           major_version, minor_version);
     return false;
   }
 }
 #endif
 
-  // end of CUDA Helper Functions
+// end of CUDA Helper Functions
 
-#endif  // COMMON_HELPER_CUDA_H_
+#endif // COMMON_HELPER_CUDA_H_
diff --git a/demos/CUDA/BlackScholes/helper/helper_functions.h b/demos/CUDA/BlackScholes/helper/helper_functions.h
index 14dc77fc7..bd40ba43e 100644
--- a/demos/CUDA/BlackScholes/helper/helper_functions.h
+++ b/demos/CUDA/BlackScholes/helper/helper_functions.h
@@ -48,12 +48,12 @@
 #include <vector>
 
 // includes, timer, string parsing, image helpers
-#include <helper_image.h>  // helper functions for image compare, dump, data comparisons
-#include <helper_string.h>  // helper functions for string parsing
-#include <helper_timer.h>   // helper functions for timers
+#include <helper_image.h> // helper functions for image compare, dump, data comparisons
+#include <helper_string.h> // helper functions for string parsing
+#include <helper_timer.h>  // helper functions for timers
 
 #ifndef EXIT_WAIVED
 #define EXIT_WAIVED 2
 #endif
 
-#endif  // COMMON_HELPER_FUNCTIONS_H_
+#endif // COMMON_HELPER_FUNCTIONS_H_
diff --git a/demos/CUDA/BlackScholes/helper/helper_image.h b/demos/CUDA/BlackScholes/helper/helper_image.h
index 24b10462d..33fbf1b62 100644
--- a/demos/CUDA/BlackScholes/helper/helper_image.h
+++ b/demos/CUDA/BlackScholes/helper/helper_image.h
@@ -61,55 +61,49 @@ const unsigned int PGMHeaderSize = 0x40;
 // types
 
 //! Data converter from unsigned char / unsigned byte to type T
-template <class T>
-struct ConverterFromUByte;
+template <class T> struct ConverterFromUByte;
 
 //! Data converter from unsigned char / unsigned byte
-template <>
-struct ConverterFromUByte<unsigned char> {
+template <> struct ConverterFromUByte<unsigned char> {
   //! Conversion operator
   //! @return converted value
   //! @param  val  value to convert
-  float operator()(const unsigned char &val) {
+  float operator()(const unsigned char& val) {
     return static_cast<unsigned char>(val);
   }
 };
 
 //! Data converter from unsigned char / unsigned byte to float
-template <>
-struct ConverterFromUByte<float> {
+template <> struct ConverterFromUByte<float> {
   //! Conversion operator
   //! @return converted value
   //! @param  val  value to convert
-  float operator()(const unsigned char &val) {
+  float operator()(const unsigned char& val) {
     return static_cast<float>(val) / 255.0f;
   }
 };
 
 //! Data converter from unsigned char / unsigned byte to type T
-template <class T>
-struct ConverterToUByte;
+template <class T> struct ConverterToUByte;
 
 //! Data converter from unsigned char / unsigned byte to unsigned int
-template <>
-struct ConverterToUByte<unsigned char> {
+template <> struct ConverterToUByte<unsigned char> {
   //! Conversion operator (essentially a passthru
   //! @return converted value
   //! @param  val  value to convert
-  unsigned char operator()(const unsigned char &val) { return val; }
+  unsigned char operator()(const unsigned char& val) { return val; }
 };
 
 //! Data converter from unsigned char / unsigned byte to unsigned int
-template <>
-struct ConverterToUByte<float> {
+template <> struct ConverterToUByte<float> {
   //! Conversion operator
   //! @return converted value
   //! @param  val  value to convert
-  unsigned char operator()(const float &val) {
+  unsigned char operator()(const float& val) {
     return static_cast<unsigned char>(val * 255.0f);
   }
 };
-}  // namespace helper_image_internal
+} // namespace helper_image_internal
 
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 #ifndef FOPEN
@@ -133,9 +127,9 @@ struct ConverterToUByte<float> {
 #endif
 #endif
 
-inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w,
-                      unsigned int *h, unsigned int *channels) {
-  FILE *fp = NULL;
+inline bool __loadPPM(const char* file, unsigned char** data, unsigned int* w,
+                      unsigned int* h, unsigned int* channels) {
+  FILE* fp = NULL;
 
   if (FOPEN_FAIL(FOPEN(fp, file, "rb"))) {
     std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
@@ -173,36 +167,31 @@ inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w,
       return false;
     }
 
-    if (header[0] == '#') {
+    if (header[0] == '#')
       continue;
-    }
 
-    if (i == 0) {
+    if (i == 0)
       i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
-    } else if (i == 1) {
+    else if (i == 1)
       i += SSCANF(header, "%u %u", &height, &maxval);
-    } else if (i == 2) {
+    else if (i == 2)
       i += SSCANF(header, "%u", &maxval);
-    }
   }
 
   // check if given handle for the data is initialized
   if (NULL != *data) {
-    if (*w != width || *h != height) {
+    if (*w != width || *h != height)
       std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
-    }
   } else {
-    *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height *
-                                    *channels);
+    *data = (unsigned char*)malloc(sizeof(unsigned char) * width * height *
+                                   *channels);
     *w = width;
     *h = height;
   }
 
   // read and close file
-  if (fread(*data, sizeof(unsigned char), width * height * *channels, fp) ==
-      0) {
+  if (fread(*data, sizeof(unsigned char), width * height * *channels, fp) == 0)
     std::cerr << "__LoadPPM() read data returned error." << std::endl;
-  }
 
   fclose(fp);
 
@@ -210,22 +199,20 @@ inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w,
 }
 
 template <class T>
-inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w,
-                       unsigned int *h) {
-  unsigned char *idata = NULL;
+inline bool sdkLoadPGM(const char* file, T** data, unsigned int* w,
+                       unsigned int* h) {
+  unsigned char* idata = NULL;
   unsigned int channels;
 
-  if (true != __loadPPM(file, &idata, w, h, &channels)) {
+  if (true != __loadPPM(file, &idata, w, h, &channels))
     return false;
-  }
 
   unsigned int size = *w * *h * channels;
 
   // initialize mem if necessary
   // the correct size is checked / set in loadPGMc()
-  if (NULL == *data) {
-    *data = reinterpret_cast<T *>(malloc(sizeof(T) * size));
-  }
+  if (NULL == *data)
+    *data = reinterpret_cast<T*>(malloc(sizeof(T) * size));
 
   // copy and cast data
   std::transform(idata, idata + size, *data,
@@ -237,18 +224,18 @@ inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w,
 }
 
 template <class T>
-inline bool sdkLoadPPM4(const char *file, T **data, unsigned int *w,
-                        unsigned int *h) {
-  unsigned char *idata = 0;
+inline bool sdkLoadPPM4(const char* file, T** data, unsigned int* w,
+                        unsigned int* h) {
+  unsigned char* idata = 0;
   unsigned int channels;
 
   if (__loadPPM(file, &idata, w, h, &channels)) {
     // pad 4th component
     int size = *w * *h;
     // keep the original pointer
-    unsigned char *idata_orig = idata;
-    *data = reinterpret_cast<T *>(malloc(sizeof(T) * size * 4));
-    unsigned char *ptr = *data;
+    unsigned char* idata_orig = idata;
+    *data = reinterpret_cast<T*>(malloc(sizeof(T) * size * 4));
+    unsigned char* ptr = *data;
 
     for (int i = 0; i < size; i++) {
       *ptr++ = *idata++;
@@ -265,7 +252,7 @@ inline bool sdkLoadPPM4(const char *file, T **data, unsigned int *w,
   }
 }
 
-inline bool __savePPM(const char *file, unsigned char *data, unsigned int w,
+inline bool __savePPM(const char* file, unsigned char* data, unsigned int w,
                       unsigned int h, unsigned int channels) {
   assert(NULL != data);
   assert(w > 0);
@@ -289,9 +276,8 @@ inline bool __savePPM(const char *file, unsigned char *data, unsigned int w,
 
   fh << w << "\n" << h << "\n" << 0xff << std::endl;
 
-  for (unsigned int i = 0; (i < (w * h * channels)) && fh.good(); ++i) {
+  for (unsigned int i = 0; (i < (w * h * channels)) && fh.good(); ++i)
     fh << data[i];
-  }
 
   fh.flush();
 
@@ -306,10 +292,10 @@ inline bool __savePPM(const char *file, unsigned char *data, unsigned int w,
 }
 
 template <class T>
-inline bool sdkSavePGM(const char *file, T *data, unsigned int w,
+inline bool sdkSavePGM(const char* file, T* data, unsigned int w,
                        unsigned int h) {
   unsigned int size = w * h;
-  unsigned char *idata = (unsigned char *)malloc(sizeof(unsigned char) * size);
+  unsigned char* idata = (unsigned char*)malloc(sizeof(unsigned char) * size);
 
   std::transform(data, data + size, idata,
                  helper_image_internal::ConverterToUByte<T>());
@@ -323,13 +309,13 @@ inline bool sdkSavePGM(const char *file, T *data, unsigned int w,
   return result;
 }
 
-inline bool sdkSavePPM4ub(const char *file, unsigned char *data, unsigned int w,
+inline bool sdkSavePPM4ub(const char* file, unsigned char* data, unsigned int w,
                           unsigned int h) {
   // strip 4th component
   int size = w * h;
-  unsigned char *ndata =
-      (unsigned char *)malloc(sizeof(unsigned char) * size * 3);
-  unsigned char *ptr = ndata;
+  unsigned char* ndata =
+      (unsigned char*)malloc(sizeof(unsigned char) * size * 3);
+  unsigned char* ptr = ndata;
 
   for (int i = 0; i < size; i++) {
     *ptr++ = *data++;
@@ -352,7 +338,7 @@ inline bool sdkSavePPM4ub(const char *file, unsigned char *data, unsigned int w,
 //! @param len  number of data elements in data, -1 on error
 //////////////////////////////////////////////////////////////////////////////
 template <class T>
-inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
+inline bool sdkReadFile(const char* filename, T** data, unsigned int* len,
                         bool verbose) {
   // check input arguments
   assert(NULL != filename);
@@ -362,7 +348,7 @@ inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
   std::vector<T> data_read;
 
   // open file for reading
-  FILE *fh = NULL;
+  FILE* fh = NULL;
 
   // check if filestream is valid
   if (FOPEN_FAIL(FOPEN(fh, filename, "r"))) {
@@ -394,7 +380,7 @@ inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
     }
   } else {
     // allocate storage for the data read
-    *data = reinterpret_cast<T *>(malloc(sizeof(T) * data_read.size()));
+    *data = reinterpret_cast<T*>(malloc(sizeof(T) * data_read.size()));
     // store signal size
     *len = static_cast<unsigned int>(data_read.size());
   }
@@ -414,7 +400,7 @@ inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
 //! @param len  number of data elements in data, -1 on error
 //////////////////////////////////////////////////////////////////////////////
 template <class T>
-inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len,
+inline bool sdkReadFileBlocks(const char* filename, T** data, unsigned int* len,
                               unsigned int block_num, unsigned int block_size,
                               bool verbose) {
   // check input arguments
@@ -422,7 +408,7 @@ inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len,
   assert(NULL != len);
 
   // open file for reading
-  FILE *fh = fopen(filename, "rb");
+  FILE* fh = fopen(filename, "rb");
 
   if (fh == NULL && verbose) {
     std::cerr << "sdkReadFile() : Opening file failed." << std::endl;
@@ -431,7 +417,7 @@ inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len,
 
   // check if the given handle is already initialized
   // allocate storage for the data read
-  data[block_num] = reinterpret_cast<T *>(malloc(block_size));
+  data[block_num] = reinterpret_cast<T*>(malloc(block_size));
 
   // read all data elements
   fseek(fh, block_num * block_size, SEEK_SET);
@@ -451,7 +437,7 @@ inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len,
 //! @param epsilon  epsilon for comparison
 //////////////////////////////////////////////////////////////////////////////
 template <class T, class S>
-inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len,
+inline bool sdkWriteFile(const char* filename, const T* data, unsigned int len,
                          const S epsilon, bool verbose, bool append = false) {
   assert(NULL != filename);
   assert(NULL != data);
@@ -476,9 +462,8 @@ inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len,
 
   // check if filestream is valid
   if (!fh.good()) {
-    if (verbose) {
+    if (verbose)
       std::cerr << "sdkWriteFile() : Opening file failed." << std::endl;
-    }
 
     return false;
   }
@@ -487,15 +472,13 @@ inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len,
   fh << "# " << epsilon << "\n";
 
   // write data
-  for (unsigned int i = 0; (i < len) && (fh.good()); ++i) {
+  for (unsigned int i = 0; (i < len) && (fh.good()); ++i)
     fh << data[i] << ' ';
-  }
 
   // Check if writing succeeded
   if (!fh.good()) {
-    if (verbose) {
+    if (verbose)
       std::cerr << "sdkWriteFile() : Writing file failed." << std::endl;
-    }
 
     return false;
   }
@@ -515,7 +498,7 @@ inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len,
 //! @param epsilon    epsilon to use for the comparison
 //////////////////////////////////////////////////////////////////////////////
 template <class T, class S>
-inline bool compareData(const T *reference, const T *data,
+inline bool compareData(const T* reference, const T* data,
                         const unsigned int len, const S epsilon,
                         const float threshold) {
   assert(epsilon >= 0);
@@ -569,7 +552,7 @@ inline bool compareData(const T *reference, const T *data,
 //! @param epsilon    threshold % of (# of bytes) for pass/fail
 //////////////////////////////////////////////////////////////////////////////
 template <class T, class S>
-inline bool compareDataAsFloatThreshold(const T *reference, const T *data,
+inline bool compareDataAsFloatThreshold(const T* reference, const T* data,
                                         const unsigned int len, const S epsilon,
                                         const float threshold) {
   assert(epsilon >= 0);
@@ -585,15 +568,13 @@ inline bool compareDataAsFloatThreshold(const T *reference, const T *data,
     bool comp = (diff < max_error);
     result &= comp;
 
-    if (!comp) {
+    if (!comp)
       error_count++;
-    }
   }
 
   if (threshold == 0.0f) {
-    if (error_count) {
+    if (error_count)
       printf("total # of errors = %d\n", error_count);
-    }
 
     return (error_count == 0) ? true : false;
   } else {
@@ -607,18 +588,18 @@ inline bool compareDataAsFloatThreshold(const T *reference, const T *data,
   }
 }
 
-inline void sdkDumpBin(void *data, unsigned int bytes, const char *filename) {
+inline void sdkDumpBin(void* data, unsigned int bytes, const char* filename) {
   printf("sdkDumpBin: <%s>\n", filename);
-  FILE *fp;
+  FILE* fp;
   FOPEN(fp, filename, "wb");
   fwrite(data, bytes, 1, fp);
   fflush(fp);
   fclose(fp);
 }
 
-inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file,
+inline bool sdkCompareBin2BinUint(const char* src_file, const char* ref_file,
                                   unsigned int nelements, const float epsilon,
-                                  const float threshold, char *exec_path) {
+                                  const float threshold, char* exec_path) {
   unsigned int *src_buffer, *ref_buffer;
   FILE *src_fp = NULL, *ref_fp = NULL;
 
@@ -631,7 +612,7 @@ inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file,
     error_count++;
   }
 
-  char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
+  char* ref_file_path = sdkFindFilePath(ref_file, exec_path);
 
   if (ref_file_path == NULL) {
     printf("compareBin2Bin <unsigned int>  unable to find <%s> in <%s>\n",
@@ -642,42 +623,37 @@ inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file,
     printf("  FAILED\n");
     error_count++;
 
-    if (src_fp) {
+    if (src_fp)
       fclose(src_fp);
-    }
 
-    if (ref_fp) {
+    if (ref_fp)
       fclose(ref_fp);
-    }
   } else {
     if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) {
-      printf(
-          "compareBin2Bin <unsigned int>"
-          " unable to open ref_file: %s\n",
-          ref_file_path);
+      printf("compareBin2Bin <unsigned int>"
+             " unable to open ref_file: %s\n",
+             ref_file_path);
       error_count++;
     }
 
     if (src_fp && ref_fp) {
-      src_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int));
-      ref_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int));
+      src_buffer = (unsigned int*)malloc(nelements * sizeof(unsigned int));
+      ref_buffer = (unsigned int*)malloc(nelements * sizeof(unsigned int));
 
       fsize = fread(src_buffer, nelements, sizeof(unsigned int), src_fp);
       fsize = fread(ref_buffer, nelements, sizeof(unsigned int), ref_fp);
 
-      printf(
-          "> compareBin2Bin <unsigned int> nelements=%d,"
-          " epsilon=%4.2f, threshold=%4.2f\n",
-          nelements, epsilon, threshold);
+      printf("> compareBin2Bin <unsigned int> nelements=%d,"
+             " epsilon=%4.2f, threshold=%4.2f\n",
+             nelements, epsilon, threshold);
       printf("   src_file <%s>, size=%d bytes\n", src_file,
              static_cast<int>(fsize));
       printf("   ref_file <%s>, size=%d bytes\n", ref_file_path,
              static_cast<int>(fsize));
 
       if (!compareData<unsigned int, float>(ref_buffer, src_buffer, nelements,
-                                            epsilon, threshold)) {
+                                            epsilon, threshold))
         error_count++;
-      }
 
       fclose(src_fp);
       fclose(ref_fp);
@@ -685,28 +661,25 @@ inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file,
       free(src_buffer);
       free(ref_buffer);
     } else {
-      if (src_fp) {
+      if (src_fp)
         fclose(src_fp);
-      }
 
-      if (ref_fp) {
+      if (ref_fp)
         fclose(ref_fp);
-      }
     }
   }
 
-  if (error_count == 0) {
+  if (error_count == 0)
     printf("  OK\n");
-  } else {
+  else
     printf("  FAILURE: %d errors...\n", (unsigned int)error_count);
-  }
 
-  return (error_count == 0);  // returns true if all pixels pass
+  return (error_count == 0); // returns true if all pixels pass
 }
 
-inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
+inline bool sdkCompareBin2BinFloat(const char* src_file, const char* ref_file,
                                    unsigned int nelements, const float epsilon,
-                                   const float threshold, char *exec_path) {
+                                   const float threshold, char* exec_path) {
   float *src_buffer = NULL, *ref_buffer = NULL;
   FILE *src_fp = NULL, *ref_fp = NULL;
   size_t fsize = 0;
@@ -718,7 +691,7 @@ inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
     error_count = 1;
   }
 
-  char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
+  char* ref_file_path = sdkFindFilePath(ref_file, exec_path);
 
   if (ref_file_path == NULL) {
     printf("compareBin2Bin <float> unable to find <%s> in <%s>\n", ref_file,
@@ -729,13 +702,11 @@ inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
     printf("  FAILED\n");
     error_count++;
 
-    if (src_fp) {
+    if (src_fp)
       fclose(src_fp);
-    }
 
-    if (ref_fp) {
+    if (ref_fp)
       fclose(ref_fp);
-    }
   } else {
     if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) {
       printf("compareBin2Bin <float> unable to open ref_file: %s\n",
@@ -744,13 +715,12 @@ inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
     }
 
     if (src_fp && ref_fp) {
-      src_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float)));
-      ref_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float)));
+      src_buffer = reinterpret_cast<float*>(malloc(nelements * sizeof(float)));
+      ref_buffer = reinterpret_cast<float*>(malloc(nelements * sizeof(float)));
 
-      printf(
-          "> compareBin2Bin <float> nelements=%d, epsilon=%4.2f,"
-          " threshold=%4.2f\n",
-          nelements, epsilon, threshold);
+      printf("> compareBin2Bin <float> nelements=%d, epsilon=%4.2f,"
+             " threshold=%4.2f\n",
+             nelements, epsilon, threshold);
       fsize = fread(src_buffer, sizeof(float), nelements, src_fp);
       printf("   src_file <%s>, size=%d bytes\n", src_file,
              static_cast<int>(fsize * sizeof(float)));
@@ -759,9 +729,8 @@ inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
              static_cast<int>(fsize * sizeof(float)));
 
       if (!compareDataAsFloatThreshold<float, float>(
-              ref_buffer, src_buffer, nelements, epsilon, threshold)) {
+              ref_buffer, src_buffer, nelements, epsilon, threshold))
         error_count++;
-      }
 
       fclose(src_fp);
       fclose(ref_fp);
@@ -769,26 +738,23 @@ inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
       free(src_buffer);
       free(ref_buffer);
     } else {
-      if (src_fp) {
+      if (src_fp)
         fclose(src_fp);
-      }
 
-      if (ref_fp) {
+      if (ref_fp)
         fclose(ref_fp);
-      }
     }
   }
 
-  if (error_count == 0) {
+  if (error_count == 0)
     printf("  OK\n");
-  } else {
+  else
     printf("  FAILURE: %d errors...\n", (unsigned int)error_count);
-  }
 
-  return (error_count == 0);  // returns true if all pixels pass
+  return (error_count == 0); // returns true if all pixels pass
 }
 
-inline bool sdkCompareL2fe(const float *reference, const float *data,
+inline bool sdkCompareL2fe(const float* reference, const float* data,
                            const unsigned int len, const float epsilon) {
   assert(epsilon >= 0);
 
@@ -825,24 +791,24 @@ inline bool sdkCompareL2fe(const float *reference, const float *data,
   return result;
 }
 
-inline bool sdkLoadPPMub(const char *file, unsigned char **data,
-                         unsigned int *w, unsigned int *h) {
+inline bool sdkLoadPPMub(const char* file, unsigned char** data,
+                         unsigned int* w, unsigned int* h) {
   unsigned int channels;
   return __loadPPM(file, data, w, h, &channels);
 }
 
-inline bool sdkLoadPPM4ub(const char *file, unsigned char **data,
-                          unsigned int *w, unsigned int *h) {
-  unsigned char *idata = 0;
+inline bool sdkLoadPPM4ub(const char* file, unsigned char** data,
+                          unsigned int* w, unsigned int* h) {
+  unsigned char* idata = 0;
   unsigned int channels;
 
   if (__loadPPM(file, &idata, w, h, &channels)) {
     // pad 4th component
     int size = *w * *h;
     // keep the original pointer
-    unsigned char *idata_orig = idata;
-    *data = (unsigned char *)malloc(sizeof(unsigned char) * size * 4);
-    unsigned char *ptr = *data;
+    unsigned char* idata_orig = idata;
+    *data = (unsigned char*)malloc(sizeof(unsigned char) * size * 4);
+    unsigned char* ptr = *data;
 
     for (int i = 0; i < size; i++) {
       *ptr++ = *idata++;
@@ -859,7 +825,7 @@ inline bool sdkLoadPPM4ub(const char *file, unsigned char **data,
   }
 }
 
-inline bool sdkComparePPM(const char *src_file, const char *ref_file,
+inline bool sdkComparePPM(const char* src_file, const char* ref_file,
                           const float epsilon, const float threshold,
                           bool verboseErrors) {
   unsigned char *src_data, *ref_data;
@@ -911,25 +877,22 @@ inline bool sdkComparePPM(const char *src_file, const char *ref_file,
   }
 
   if (compareData(ref_data, src_data, src_width * src_height * 4, epsilon,
-                  threshold) == false) {
+                  threshold) == false)
     error_count = 1;
-  }
 
   if (error_count == 0) {
-    if (verboseErrors) {
+    if (verboseErrors)
       std::cerr << "    OK\n\n";
-    }
   } else {
-    if (verboseErrors) {
+    if (verboseErrors)
       std::cerr << "    FAILURE!  " << error_count << " errors...\n\n";
-    }
   }
 
   // returns true if all pixels pass
   return (error_count == 0) ? true : false;
 }
 
-inline bool sdkComparePGM(const char *src_file, const char *ref_file,
+inline bool sdkComparePGM(const char* src_file, const char* ref_file,
                           const float epsilon, const float threshold,
                           bool verboseErrors) {
   unsigned char *src_data = 0, *ref_data = 0;
@@ -980,22 +943,19 @@ inline bool sdkComparePGM(const char *src_file, const char *ref_file,
               << threshold * 100 << "%)\n";
 
   if (compareData(ref_data, src_data, src_width * src_height, epsilon,
-                  threshold) == false) {
+                  threshold) == false)
     error_count = 1;
-  }
 
   if (error_count == 0) {
-    if (verboseErrors) {
+    if (verboseErrors)
       std::cerr << "    OK\n\n";
-    }
   } else {
-    if (verboseErrors) {
+    if (verboseErrors)
       std::cerr << "    FAILURE!  " << error_count << " errors...\n\n";
-    }
   }
 
   // returns true if all pixels pass
   return (error_count == 0) ? true : false;
 }
 
-#endif  // COMMON_HELPER_IMAGE_H_
+#endif // COMMON_HELPER_IMAGE_H_
diff --git a/demos/CUDA/BlackScholes/helper/helper_string.h b/demos/CUDA/BlackScholes/helper/helper_string.h
index 39a1b3805..f6c25f659 100644
--- a/demos/CUDA/BlackScholes/helper/helper_string.h
+++ b/demos/CUDA/BlackScholes/helper/helper_string.h
@@ -29,9 +29,9 @@
 #ifndef COMMON_HELPER_STRING_H_
 #define COMMON_HELPER_STRING_H_
 
+#include <fstream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <fstream>
 #include <string>
 
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
@@ -60,7 +60,7 @@
 #ifndef SPRINTF
 #define SPRINTF sprintf_s
 #endif
-#else  // Linux Includes
+#else // Linux Includes
 #include <string.h>
 #include <strings.h>
 
@@ -93,28 +93,27 @@
 #endif
 
 // CUDA Utility Helper Functions
-inline int stringRemoveDelimiter(char delimiter, const char *string) {
+inline int stringRemoveDelimiter(char delimiter, const char* string) {
   int string_start = 0;
 
-  while (string[string_start] == delimiter) {
+  while (string[string_start] == delimiter)
     string_start++;
-  }
 
-  if (string_start >= static_cast<int>(strlen(string) - 1)) {
+  if (string_start >= static_cast<int>(strlen(string) - 1))
     return 0;
-  }
 
   return string_start;
 }
 
-inline int getFileExtension(char *filename, char **extension) {
+inline int getFileExtension(char* filename, char** extension) {
   int string_length = static_cast<int>(strlen(filename));
 
-  while (filename[string_length--] != '.') {
-    if (string_length == 0) break;
-  }
+  while (filename[string_length--] != '.')
+    if (string_length == 0)
+      break;
 
-  if (string_length > 0) string_length += 2;
+  if (string_length > 0)
+    string_length += 2;
 
   if (string_length == 0)
     *extension = NULL;
@@ -124,16 +123,16 @@ inline int getFileExtension(char *filename, char **extension) {
   return string_length;
 }
 
-inline bool checkCmdLineFlag(const int argc, const char **argv,
-                             const char *string_ref) {
+inline bool checkCmdLineFlag(const int argc, const char** argv,
+                             const char* string_ref) {
   bool bFound = false;
 
   if (argc >= 1) {
     for (int i = 1; i < argc; i++) {
       int string_start = stringRemoveDelimiter('-', argv[i]);
-      const char *string_argv = &argv[i][string_start];
+      const char* string_argv = &argv[i][string_start];
 
-      const char *equal_pos = strchr(string_argv, '=');
+      const char* equal_pos = strchr(string_argv, '=');
       int argv_length = static_cast<int>(
           equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
 
@@ -152,14 +151,14 @@ inline bool checkCmdLineFlag(const int argc, const char **argv,
 
 // This function wraps the CUDA Driver API into a template function
 template <class T>
-inline bool getCmdLineArgumentValue(const int argc, const char **argv,
-                                    const char *string_ref, T *value) {
+inline bool getCmdLineArgumentValue(const int argc, const char** argv,
+                                    const char* string_ref, T* value) {
   bool bFound = false;
 
   if (argc >= 1) {
     for (int i = 1; i < argc; i++) {
       int string_start = stringRemoveDelimiter('-', argv[i]);
-      const char *string_argv = &argv[i][string_start];
+      const char* string_argv = &argv[i][string_start];
       int length = static_cast<int>(strlen(string_ref));
 
       if (!STRNCASECMP(string_argv, string_ref, length)) {
@@ -177,15 +176,15 @@ inline bool getCmdLineArgumentValue(const int argc, const char **argv,
   return bFound;
 }
 
-inline int getCmdLineArgumentInt(const int argc, const char **argv,
-                                 const char *string_ref) {
+inline int getCmdLineArgumentInt(const int argc, const char** argv,
+                                 const char* string_ref) {
   bool bFound = false;
   int value = -1;
 
   if (argc >= 1) {
     for (int i = 1; i < argc; i++) {
       int string_start = stringRemoveDelimiter('-', argv[i]);
-      const char *string_argv = &argv[i][string_start];
+      const char* string_argv = &argv[i][string_start];
       int length = static_cast<int>(strlen(string_ref));
 
       if (!STRNCASECMP(string_argv, string_ref, length)) {
@@ -202,22 +201,21 @@ inline int getCmdLineArgumentInt(const int argc, const char **argv,
     }
   }
 
-  if (bFound) {
+  if (bFound)
     return value;
-  } else {
+  else
     return 0;
-  }
 }
 
-inline float getCmdLineArgumentFloat(const int argc, const char **argv,
-                                     const char *string_ref) {
+inline float getCmdLineArgumentFloat(const int argc, const char** argv,
+                                     const char* string_ref) {
   bool bFound = false;
   float value = -1;
 
   if (argc >= 1) {
     for (int i = 1; i < argc; i++) {
       int string_start = stringRemoveDelimiter('-', argv[i]);
-      const char *string_argv = &argv[i][string_start];
+      const char* string_argv = &argv[i][string_start];
       int length = static_cast<int>(strlen(string_ref));
 
       if (!STRNCASECMP(string_argv, string_ref, length)) {
@@ -234,22 +232,21 @@ inline float getCmdLineArgumentFloat(const int argc, const char **argv,
     }
   }
 
-  if (bFound) {
+  if (bFound)
     return value;
-  } else {
+  else
     return 0;
-  }
 }
 
-inline bool getCmdLineArgumentString(const int argc, const char **argv,
-                                     const char *string_ref,
-                                     char **string_retval) {
+inline bool getCmdLineArgumentString(const int argc, const char** argv,
+                                     const char* string_ref,
+                                     char** string_retval) {
   bool bFound = false;
 
   if (argc >= 1) {
     for (int i = 1; i < argc; i++) {
       int string_start = stringRemoveDelimiter('-', argv[i]);
-      char *string_argv = const_cast<char *>(&argv[i][string_start]);
+      char* string_argv = const_cast<char*>(&argv[i][string_start]);
       int length = static_cast<int>(strlen(string_ref));
 
       if (!STRNCASECMP(string_argv, string_ref, length)) {
@@ -260,9 +257,8 @@ inline bool getCmdLineArgumentString(const int argc, const char **argv,
     }
   }
 
-  if (!bFound) {
+  if (!bFound)
     *string_retval = NULL;
-  }
 
   return bFound;
 }
@@ -275,8 +271,8 @@ inline bool getCmdLineArgumentString(const int argc, const char **argv,
 //! @param filename         name of the file
 //! @param executable_path  optional absolute path of the executable
 //////////////////////////////////////////////////////////////////////////////
-inline char *sdkFindFilePath(const char *filename,
-                             const char *executable_path) {
+inline char* sdkFindFilePath(const char* filename,
+                             const char* executable_path) {
   // <executable_name> defines a variable that is replaced with the name of the
   // executable
 
@@ -284,77 +280,95 @@ inline char *sdkFindFilePath(const char *filename,
   // input data, or JIT source files) The origin for the relative search may be
   // the .exe file, a .bat file launching an .exe, a browser .exe launching the
   // .exe or .bat, etc
-  const char *searchPath[] = {
-      "./",                                           // same dir
-      "./data/",                                      // same dir
-
-      "../../../../Samples/<executable_name>/",       // up 4 in tree
-      "../../../Samples/<executable_name>/",          // up 3 in tree
-      "../../Samples/<executable_name>/",             // up 2 in tree
-
-      "../../../../Samples/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/0_Introduction/<executable_name>/",  // up 4 in tree
-      "../../../Samples/0_Introduction/<executable_name>/",     // up 3 in tree
-      "../../Samples/0_Introduction/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/1_Utilities/<executable_name>/",  // up 4 in tree
-      "../../../Samples/1_Utilities/<executable_name>/",     // up 3 in tree
-      "../../Samples/1_Utilities/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/2_Concepts_and_Techniques/<executable_name>/",  // up 4 in tree
-      "../../../Samples/2_Concepts_and_Techniques/<executable_name>/",     // up 3 in tree
-      "../../Samples/2_Concepts_and_Techniques/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/3_CUDA_Features/<executable_name>/",  // up 4 in tree
-      "../../../Samples/3_CUDA_Features/<executable_name>/",     // up 3 in tree
-      "../../Samples/3_CUDA_Features/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/4_CUDA_Libraries/<executable_name>/",  // up 4 in tree
-      "../../../Samples/4_CUDA_Libraries/<executable_name>/",     // up 3 in tree
-      "../../Samples/4_CUDA_Libraries/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/5_Domain_Specific/<executable_name>/",  // up 4 in tree
-      "../../../Samples/5_Domain_Specific/<executable_name>/",     // up 3 in tree
-      "../../Samples/5_Domain_Specific/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/6_Performance/<executable_name>/",  // up 4 in tree
-      "../../../Samples/6_Performance/<executable_name>/",     // up 3 in tree
-      "../../Samples/6_Performance/<executable_name>/",        // up 2 in tree
-
-      "../../../../Samples/0_Introduction/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/0_Introduction/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/0_Introduction/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/1_Utilities/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/1_Utilities/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/1_Utilities/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/2_Concepts_and_Techniques/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/2_Concepts_and_Techniques/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/2_Concepts_and_Techniques/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/3_CUDA_Features/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/3_CUDA_Features/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/3_CUDA_Features/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/4_CUDA_Libraries/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/4_CUDA_Libraries/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/4_CUDA_Libraries/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/5_Domain_Specific/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/5_Domain_Specific/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/5_Domain_Specific/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Samples/6_Performance/<executable_name>/data/",  // up 4 in tree
-      "../../../Samples/6_Performance/<executable_name>/data/",     // up 3 in tree
-      "../../Samples/6_Performance/<executable_name>/data/",        // up 2 in tree
-
-      "../../../../Common/data/",                     // up 4 in tree
-      "../../../Common/data/",                        // up 3 in tree
-      "../../Common/data/"                            // up 2 in tree
+  const char* searchPath[] = {
+      "./",      // same dir
+      "./data/", // same dir
+
+      "../../../../Samples/<executable_name>/", // up 4 in tree
+      "../../../Samples/<executable_name>/",    // up 3 in tree
+      "../../Samples/<executable_name>/",       // up 2 in tree
+
+      "../../../../Samples/<executable_name>/data/", // up 4 in tree
+      "../../../Samples/<executable_name>/data/",    // up 3 in tree
+      "../../Samples/<executable_name>/data/",       // up 2 in tree
+
+      "../../../../Samples/0_Introduction/<executable_name>/", // up 4 in tree
+      "../../../Samples/0_Introduction/<executable_name>/",    // up 3 in tree
+      "../../Samples/0_Introduction/<executable_name>/",       // up 2 in tree
+
+      "../../../../Samples/1_Utilities/<executable_name>/", // up 4 in tree
+      "../../../Samples/1_Utilities/<executable_name>/",    // up 3 in tree
+      "../../Samples/1_Utilities/<executable_name>/",       // up 2 in tree
+
+      "../../../../Samples/2_Concepts_and_Techniques/<executable_name>/", // up
+                                                                          // 4
+                                                                          // in
+                                                                          // tree
+      "../../../Samples/2_Concepts_and_Techniques/<executable_name>/", // up 3
+                                                                       // in
+                                                                       // tree
+      "../../Samples/2_Concepts_and_Techniques/<executable_name>/", // up 2 in
+                                                                    // tree
+
+      "../../../../Samples/3_CUDA_Features/<executable_name>/", // up 4 in tree
+      "../../../Samples/3_CUDA_Features/<executable_name>/",    // up 3 in tree
+      "../../Samples/3_CUDA_Features/<executable_name>/",       // up 2 in tree
+
+      "../../../../Samples/4_CUDA_Libraries/<executable_name>/", // up 4 in tree
+      "../../../Samples/4_CUDA_Libraries/<executable_name>/",    // up 3 in tree
+      "../../Samples/4_CUDA_Libraries/<executable_name>/",       // up 2 in tree
+
+      "../../../../Samples/5_Domain_Specific/<executable_name>/", // up 4 in
+                                                                  // tree
+      "../../../Samples/5_Domain_Specific/<executable_name>/", // up 3 in tree
+      "../../Samples/5_Domain_Specific/<executable_name>/",    // up 2 in tree
+
+      "../../../../Samples/6_Performance/<executable_name>/", // up 4 in tree
+      "../../../Samples/6_Performance/<executable_name>/",    // up 3 in tree
+      "../../Samples/6_Performance/<executable_name>/",       // up 2 in tree
+
+      "../../../../Samples/0_Introduction/<executable_name>/data/", // up 4 in
+                                                                    // tree
+      "../../../Samples/0_Introduction/<executable_name>/data/", // up 3 in tree
+      "../../Samples/0_Introduction/<executable_name>/data/",    // up 2 in tree
+
+      "../../../../Samples/1_Utilities/<executable_name>/data/", // up 4 in tree
+      "../../../Samples/1_Utilities/<executable_name>/data/",    // up 3 in tree
+      "../../Samples/1_Utilities/<executable_name>/data/",       // up 2 in tree
+
+      "../../../../Samples/2_Concepts_and_Techniques/<executable_name>/data/", // up 4 in tree
+      "../../../Samples/2_Concepts_and_Techniques/<executable_name>/data/", // up 3 in tree
+      "../../Samples/2_Concepts_and_Techniques/<executable_name>/data/", // up 2
+                                                                         // in
+                                                                         // tree
+
+      "../../../../Samples/3_CUDA_Features/<executable_name>/data/", // up 4 in
+                                                                     // tree
+      "../../../Samples/3_CUDA_Features/<executable_name>/data/",    // up 3 in
+                                                                     // tree
+      "../../Samples/3_CUDA_Features/<executable_name>/data/", // up 2 in tree
+
+      "../../../../Samples/4_CUDA_Libraries/<executable_name>/data/", // up 4 in
+                                                                      // tree
+      "../../../Samples/4_CUDA_Libraries/<executable_name>/data/",    // up 3 in
+                                                                      // tree
+      "../../Samples/4_CUDA_Libraries/<executable_name>/data/", // up 2 in tree
+
+      "../../../../Samples/5_Domain_Specific/<executable_name>/data/", // up 4
+                                                                       // in
+                                                                       // tree
+      "../../../Samples/5_Domain_Specific/<executable_name>/data/", // up 3 in
+                                                                    // tree
+      "../../Samples/5_Domain_Specific/<executable_name>/data/", // up 2 in tree
+
+      "../../../../Samples/6_Performance/<executable_name>/data/", // up 4 in
+                                                                   // tree
+      "../../../Samples/6_Performance/<executable_name>/data/", // up 3 in tree
+      "../../Samples/6_Performance/<executable_name>/data/",    // up 2 in tree
+
+      "../../../../Common/data/", // up 4 in tree
+      "../../../Common/data/",    // up 3 in tree
+      "../../Common/data/"        // up 2 in tree
   };
 
   // Extract the executable name
@@ -381,7 +395,7 @@ inline char *sdkFindFilePath(const char *filename,
   }
 
   // Loop over all search paths and return the first hit
-  for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
+  for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char*); ++i) {
     std::string path(searchPath[i]);
     size_t executable_name_pos = path.find("<executable_name>");
 
@@ -403,21 +417,20 @@ inline char *sdkFindFilePath(const char *filename,
 
     // Test if the file exists
     path.append(filename);
-    FILE *fp;
+    FILE* fp;
     FOPEN(fp, path.c_str(), "rb");
 
     if (fp != NULL) {
       fclose(fp);
       // File found
       // returning an allocated array here for backwards compatibility reasons
-      char *file_path = reinterpret_cast<char *>(malloc(path.length() + 1));
+      char* file_path = reinterpret_cast<char*>(malloc(path.length() + 1));
       STRCPY(file_path, path.length() + 1, path.c_str());
       return file_path;
     }
 
-    if (fp) {
+    if (fp)
       fclose(fp);
-    }
   }
 
   // File not found
@@ -425,4 +438,4 @@ inline char *sdkFindFilePath(const char *filename,
   return 0;
 }
 
-#endif  // COMMON_HELPER_STRING_H_
+#endif // COMMON_HELPER_STRING_H_
diff --git a/demos/CUDA/BlackScholes/helper/helper_timer.h b/demos/CUDA/BlackScholes/helper/helper_timer.h
index 5aceaf8a9..3869bf8ca 100644
--- a/demos/CUDA/BlackScholes/helper/helper_timer.h
+++ b/demos/CUDA/BlackScholes/helper/helper_timer.h
@@ -42,11 +42,11 @@
 // Definition of the StopWatch Interface, this is used if we don't want to use
 // the CUT functions But rather in a self contained class interface
 class StopWatchInterface {
- public:
+public:
   StopWatchInterface() {}
   virtual ~StopWatchInterface() {}
 
- public:
+public:
   //! Start time measurement
   virtual void start() = 0;
 
@@ -78,23 +78,17 @@ class StopWatchInterface {
 
 //! Windows specific implementation of StopWatch
 class StopWatchWin : public StopWatchInterface {
- public:
+public:
   //! Constructor, default
   StopWatchWin()
-      : start_time(),
-        end_time(),
-        diff_time(0.0f),
-        total_time(0.0f),
-        running(false),
-        clock_sessions(0),
-        freq(0),
-        freq_set(false) {
+      : start_time(), end_time(), diff_time(0.0f), total_time(0.0f),
+        running(false), clock_sessions(0), freq(0), freq_set(false) {
     if (!freq_set) {
       // helper variable
       LARGE_INTEGER temp;
 
       // get the tick frequency from the OS
-      QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER *>(&temp));
+      QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(&temp));
 
       // convert to type in which it is needed
       freq = (static_cast<double>(temp.QuadPart)) / 1000.0;
@@ -107,7 +101,7 @@ class StopWatchWin : public StopWatchInterface {
   // Destructor
   ~StopWatchWin() {}
 
- public:
+public:
   //! Start time measurement
   inline void start();
 
@@ -126,7 +120,7 @@ class StopWatchWin : public StopWatchInterface {
   //! _stopped_ (ie finished sessions) and the current total time
   inline float getAverageTime();
 
- private:
+private:
   // member variables
 
   //! Start of measurement
@@ -160,7 +154,7 @@ class StopWatchWin : public StopWatchInterface {
 //! Start time measurement
 ////////////////////////////////////////////////////////////////////////////////
 inline void StopWatchWin::start() {
-  QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time));
+  QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(&start_time));
   running = true;
 }
 
@@ -169,7 +163,7 @@ inline void StopWatchWin::start() {
 //! variable. Also increment the number of times this clock has been run.
 ////////////////////////////////////////////////////////////////////////////////
 inline void StopWatchWin::stop() {
-  QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&end_time));
+  QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(&end_time));
   diff_time = static_cast<float>(((static_cast<double>(end_time.QuadPart) -
                                    static_cast<double>(start_time.QuadPart)) /
                                   freq));
@@ -188,9 +182,8 @@ inline void StopWatchWin::reset() {
   total_time = 0;
   clock_sessions = 0;
 
-  if (running) {
-    QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time));
-  }
+  if (running)
+    QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(&start_time));
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -205,7 +198,7 @@ inline float StopWatchWin::getTime() {
 
   if (running) {
     LARGE_INTEGER temp;
-    QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&temp));
+    QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(&temp));
     retval += static_cast<float>(((static_cast<double>(temp.QuadPart) -
                                    static_cast<double>(start_time.QuadPart)) /
                                   freq));
@@ -224,24 +217,21 @@ inline float StopWatchWin::getAverageTime() {
 #else
 // Declarations for Stopwatch on Linux and Mac OSX
 // includes, system
-#include <sys/time.h>
 #include <ctime>
+#include <sys/time.h>
 
 //! Windows specific implementation of StopWatch
 class StopWatchLinux : public StopWatchInterface {
- public:
+public:
   //! Constructor, default
   StopWatchLinux()
-      : start_time(),
-        diff_time(0.0),
-        total_time(0.0),
-        running(false),
+      : start_time(), diff_time(0.0), total_time(0.0), running(false),
         clock_sessions(0) {}
 
   // Destructor
   virtual ~StopWatchLinux() {}
 
- public:
+public:
   //! Start time measurement
   inline void start();
 
@@ -260,13 +250,13 @@ class StopWatchLinux : public StopWatchInterface {
   //! _stopped_ (ie finished sessions) and the current total time
   inline float getAverageTime();
 
- private:
+private:
   // helper functions
 
   //! Get difference between start time and current time
   inline float getDiffTime();
 
- private:
+private:
   // member variables
 
   //! Start of measurement
@@ -316,9 +306,8 @@ inline void StopWatchLinux::reset() {
   total_time = 0;
   clock_sessions = 0;
 
-  if (running) {
+  if (running)
     gettimeofday(&start_time, 0);
-  }
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -331,9 +320,8 @@ inline float StopWatchLinux::getTime() {
   // Return the TOTAL time to date
   float retval = total_time;
 
-  if (running) {
+  if (running)
     retval += getDiffTime();
-  }
 
   return retval;
 }
@@ -356,7 +344,7 @@ inline float StopWatchLinux::getDiffTime() {
   return static_cast<float>(1000.0 * (t_time.tv_sec - start_time.tv_sec) +
                             (0.001 * (t_time.tv_usec - start_time.tv_usec)));
 }
-#endif  // WIN32
+#endif // WIN32
 
 ////////////////////////////////////////////////////////////////////////////////
 //! Timer functionality exported
@@ -366,13 +354,13 @@ inline float StopWatchLinux::getDiffTime() {
 //! @return true if a time has been created, otherwise false
 //! @param  name of the new timer, 0 if the creation failed
 ////////////////////////////////////////////////////////////////////////////////
-inline bool sdkCreateTimer(StopWatchInterface **timer_interface) {
+inline bool sdkCreateTimer(StopWatchInterface** timer_interface) {
 // printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-  *timer_interface = reinterpret_cast<StopWatchInterface *>(new StopWatchWin());
+  *timer_interface = reinterpret_cast<StopWatchInterface*>(new StopWatchWin());
 #else
   *timer_interface =
-      reinterpret_cast<StopWatchInterface *>(new StopWatchLinux());
+      reinterpret_cast<StopWatchInterface*>(new StopWatchLinux());
 #endif
   return (*timer_interface != NULL) ? true : false;
 }
@@ -382,7 +370,7 @@ inline bool sdkCreateTimer(StopWatchInterface **timer_interface) {
 //! @return true if a time has been deleted, otherwise false
 //! @param  name of the timer to delete
 ////////////////////////////////////////////////////////////////////////////////
-inline bool sdkDeleteTimer(StopWatchInterface **timer_interface) {
+inline bool sdkDeleteTimer(StopWatchInterface** timer_interface) {
   // printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
   if (*timer_interface) {
     delete *timer_interface;
@@ -396,11 +384,10 @@ inline bool sdkDeleteTimer(StopWatchInterface **timer_interface) {
 //! Start the time with name \a name
 //! @param name  name of the timer to start
 ////////////////////////////////////////////////////////////////////////////////
-inline bool sdkStartTimer(StopWatchInterface **timer_interface) {
+inline bool sdkStartTimer(StopWatchInterface** timer_interface) {
   // printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
-  if (*timer_interface) {
+  if (*timer_interface)
     (*timer_interface)->start();
-  }
 
   return true;
 }
@@ -409,11 +396,10 @@ inline bool sdkStartTimer(StopWatchInterface **timer_interface) {
 //! Stop the time with name \a name. Does not reset.
 //! @param name  name of the timer to stop
 ////////////////////////////////////////////////////////////////////////////////
-inline bool sdkStopTimer(StopWatchInterface **timer_interface) {
+inline bool sdkStopTimer(StopWatchInterface** timer_interface) {
   // printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
-  if (*timer_interface) {
+  if (*timer_interface)
     (*timer_interface)->stop();
-  }
 
   return true;
 }
@@ -422,11 +408,10 @@ inline bool sdkStopTimer(StopWatchInterface **timer_interface) {
 //! Resets the timer's counter.
 //! @param name  name of the timer to reset.
 ////////////////////////////////////////////////////////////////////////////////
-inline bool sdkResetTimer(StopWatchInterface **timer_interface) {
+inline bool sdkResetTimer(StopWatchInterface** timer_interface) {
   // printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
-  if (*timer_interface) {
+  if (*timer_interface)
     (*timer_interface)->reset();
-  }
 
   return true;
 }
@@ -438,14 +423,13 @@ inline bool sdkResetTimer(StopWatchInterface **timer_interface) {
 //! Excludes the current running time if the timer is currently running.
 //! @param name  name of the timer to return the time of
 ////////////////////////////////////////////////////////////////////////////////
-inline float sdkGetAverageTimerValue(StopWatchInterface **timer_interface) {
+inline float sdkGetAverageTimerValue(StopWatchInterface** timer_interface) {
   //  printf("sdkGetAverageTimerValue called object %08x\n", (void
   //  *)*timer_interface);
-  if (*timer_interface) {
+  if (*timer_interface)
     return (*timer_interface)->getAverageTime();
-  } else {
+  else
     return 0.0f;
-  }
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -453,13 +437,12 @@ inline float sdkGetAverageTimerValue(StopWatchInterface **timer_interface) {
 //! or timer creation.
 //! @param name  name of the timer to obtain the value of.
 ////////////////////////////////////////////////////////////////////////////////
-inline float sdkGetTimerValue(StopWatchInterface **timer_interface) {
+inline float sdkGetTimerValue(StopWatchInterface** timer_interface) {
   // printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
-  if (*timer_interface) {
+  if (*timer_interface)
     return (*timer_interface)->getTime();
-  } else {
+  else
     return 0.0f;
-  }
 }
 
-#endif  // COMMON_HELPER_TIMER_H_
+#endif // COMMON_HELPER_TIMER_H_
diff --git a/demos/CUDA/TensorContraction.cu b/demos/CUDA/TensorContraction.cu
index 8e1efaf9b..9d324a065 100644
--- a/demos/CUDA/TensorContraction.cu
+++ b/demos/CUDA/TensorContraction.cu
@@ -17,236 +17,250 @@
 
 typedef unsigned long long int size_type;
 
-__device__ void computeStartStep(size_type& A_start, size_type& A_step, size_type& B_start, size_type& B_step, const int idx, const size_type A_dim[3], const size_type B_dim[3], const int contractDimA, const int contractDimB) {
-    size_type A_a, A_b, A_c, B_d, B_e, B_f;
+__device__ void computeStartStep(size_type& A_start, size_type& A_step,
+                                 size_type& B_start, size_type& B_step,
+                                 const int idx, const size_type A_dim[3],
+                                 const size_type B_dim[3],
+                                 const int contractDimA,
+                                 const int contractDimB) {
+  size_type A_a, A_b, A_c, B_d, B_e, B_f;
 
-    switch (contractDimA) {
-        case 0:
-          A_b = idx / (A_dim[2] * B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3]);
-          A_c = (idx / (B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3])) % A_dim[2];
-          A_start = 0 + A_b * A_dim[2] + A_c;
-          A_step = A_dim[1] * A_dim[2];
-          break;
-        case 1:
-          A_a = idx / (A_dim[2] * B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3]);
-          A_c = (idx / (B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3])) % A_dim[2];
-          A_start = A_a * A_dim[1] * A_dim[2] + 0 + A_c;
-          A_step = A_dim[2];
-          break;
-        case 2:
-          A_a = idx / (A_dim[1] * B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3]);
-          A_b = (idx / (B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3])) % A_dim[1];
-          A_start = A_a * A_dim[1] * A_dim[2] + A_b * A_dim[2];
-          A_step = 1;
-          break;
-    }
+  switch (contractDimA) {
+  case 0:
+    A_b = idx / (A_dim[2] * B_dim[(contractDimB + 1) % 3] *
+                 B_dim[(contractDimB + 2) % 3]);
+    A_c = (idx /
+           (B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3])) %
+          A_dim[2];
+    A_start = 0 + A_b * A_dim[2] + A_c;
+    A_step = A_dim[1] * A_dim[2];
+    break;
+  case 1:
+    A_a = idx / (A_dim[2] * B_dim[(contractDimB + 1) % 3] *
+                 B_dim[(contractDimB + 2) % 3]);
+    A_c = (idx /
+           (B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3])) %
+          A_dim[2];
+    A_start = A_a * A_dim[1] * A_dim[2] + 0 + A_c;
+    A_step = A_dim[2];
+    break;
+  case 2:
+    A_a = idx / (A_dim[1] * B_dim[(contractDimB + 1) % 3] *
+                 B_dim[(contractDimB + 2) % 3]);
+    A_b = (idx /
+           (B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3])) %
+          A_dim[1];
+    A_start = A_a * A_dim[1] * A_dim[2] + A_b * A_dim[2];
+    A_step = 1;
+    break;
+  }
 
-    switch (contractDimB) {
-        case 0:
-          B_e = (idx / B_dim[2]) % B_dim[1];
-          B_f = idx % B_dim[2];
-          B_start = 0 + B_e * B_dim[2] + B_f;
-          B_step = B_dim[1] * B_dim[2];
-          break;
-        case 1:
-          B_d = (idx / B_dim[2]) % B_dim[0];
-          B_f = idx % B_dim[2];
-          B_start = B_d * B_dim[2] * B_dim[1] + 0 + B_f;
-          B_step = B_dim[2];
-          break;
-        case 2:
-          B_d = (idx / B_dim[1]) % B_dim[0];
-          B_e = idx % B_dim[1];
-          B_start = B_d * B_dim[2] * B_dim[1] + B_e * B_dim[2];
-          B_step = 1;
-          break;
-    }
+  switch (contractDimB) {
+  case 0:
+    B_e = (idx / B_dim[2]) % B_dim[1];
+    B_f = idx % B_dim[2];
+    B_start = 0 + B_e * B_dim[2] + B_f;
+    B_step = B_dim[1] * B_dim[2];
+    break;
+  case 1:
+    B_d = (idx / B_dim[2]) % B_dim[0];
+    B_f = idx % B_dim[2];
+    B_start = B_d * B_dim[2] * B_dim[1] + 0 + B_f;
+    B_step = B_dim[2];
+    break;
+  case 2:
+    B_d = (idx / B_dim[1]) % B_dim[0];
+    B_e = idx % B_dim[1];
+    B_start = B_d * B_dim[2] * B_dim[1] + B_e * B_dim[2];
+    B_step = 1;
+    break;
+  }
 }
 
-__global__ void tensorContraction3D(float* C, const float *A, const float *B, const size_type *A_dim, const size_type *B_dim, const int contractDimA, const int contractDimB) {
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+__global__ void tensorContraction3D(float* C, const float* A, const float* B,
+                                    const size_type* A_dim,
+                                    const size_type* B_dim,
+                                    const int contractDimA,
+                                    const int contractDimB) {
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    // Each thread computes one element of the output tensor
-    int totalElements = A_dim[(contractDimA + 1) % 3] * A_dim[(contractDimA + 2) % 3] * B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3];
-    if (idx < totalElements) {
-      size_type A_start, B_start, A_step, B_step;
-      size_type A_a, A_b, A_c, B_d, B_e, B_f;
+  // Each thread computes one element of the output tensor
+  int totalElements =
+      A_dim[(contractDimA + 1) % 3] * A_dim[(contractDimA + 2) % 3] *
+      B_dim[(contractDimB + 1) % 3] * B_dim[(contractDimB + 2) % 3];
+  if (idx < totalElements) {
+    size_type A_start, B_start, A_step, B_step;
+    size_type A_a, A_b, A_c, B_d, B_e, B_f;
 
-      computeStartStep(A_start, A_step, B_start, B_step, idx, A_dim, B_dim, contractDimA, contractDimB);
-    
-      float sum = 0.0f;
-      for (int i = 0; i < A_dim[contractDimA]; i++) { // A_dim[contractDimA] == B_dim[contractDimB]
-          sum += A[A_start + (i * A_step)] * B[B_start + (i * B_step)];
-      }
+    computeStartStep(A_start, A_step, B_start, B_step, idx, A_dim, B_dim,
+                     contractDimA, contractDimB);
 
-      C[idx] = sum;
-    }
+    float sum = 0.0f;
+    for (int i = 0; i < A_dim[contractDimA];
+         i++) // A_dim[contractDimA] == B_dim[contractDimB]
+      sum += A[A_start + (i * A_step)] * B[B_start + (i * B_step)];
+
+    C[idx] = sum;
+  }
 }
 
-void launchTensorContraction3D(float* C, const float* A, const float* B, const size_type D1, const size_type D2, const size_type D3, const size_type D4, const size_type D5) {
-    float *d_A = nullptr, *d_B = nullptr, *d_C = nullptr;
-
-    const size_type A_size = D1 * D2 * D3 * sizeof(float);
-    const size_type B_size = D3 * D4 * D5 * sizeof(float);
-    const size_type C_size = D1 * D2 * D4 * D5 * sizeof(float);
-
-    // Allocate device memory and copy data from host to device
-    cudaMalloc(&d_A, A_size);
-    cudaMalloc(&d_B, B_size);
-    cudaMalloc(&d_C, C_size);
-    cudaMemcpy(d_A, A, A_size, cudaMemcpyHostToDevice);
-    cudaMemcpy(d_B, B, B_size, cudaMemcpyHostToDevice);
-
-    const size_type A_dim[3] = {D1, D2, D3};
-    const size_type B_dim[3] = {D3, D4, D5};
-
-    size_type *d_A_dim = nullptr, *d_B_dim = nullptr;
-    cudaMalloc(&d_A_dim, 3 * sizeof(size_type));
-    cudaMalloc(&d_B_dim, 3 * sizeof(size_type));
-    cudaMemcpy(d_A_dim, A_dim, 3 * sizeof(size_type), cudaMemcpyHostToDevice);
-    cudaMemcpy(d_B_dim, B_dim, 3 * sizeof(size_type), cudaMemcpyHostToDevice);
-
-    // Launch the kernel
-    tensorContraction3D<<<1, 256>>>(d_C, d_A, d_B, d_A_dim, d_B_dim, /*contractDimA=*/2, /*contractDimB=*/0);
-
-    // Copy the result from device to host
-    cudaMemcpy(C, d_C, C_size, cudaMemcpyDeviceToHost);
-
-    // Free device memory
-    cudaFree(d_A);
-    cudaFree(d_B);
-    cudaFree(d_C);
-    cudaFree(d_A_dim);
-    cudaFree(d_B_dim);
+void launchTensorContraction3D(float* C, const float* A, const float* B,
+                               const size_type D1, const size_type D2,
+                               const size_type D3, const size_type D4,
+                               const size_type D5) {
+  float *d_A = nullptr, *d_B = nullptr, *d_C = nullptr;
+
+  const size_type A_size = D1 * D2 * D3 * sizeof(float);
+  const size_type B_size = D3 * D4 * D5 * sizeof(float);
+  const size_type C_size = D1 * D2 * D4 * D5 * sizeof(float);
+
+  // Allocate device memory and copy data from host to device
+  cudaMalloc(&d_A, A_size);
+  cudaMalloc(&d_B, B_size);
+  cudaMalloc(&d_C, C_size);
+  cudaMemcpy(d_A, A, A_size, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_B, B, B_size, cudaMemcpyHostToDevice);
+
+  const size_type A_dim[3] = {D1, D2, D3};
+  const size_type B_dim[3] = {D3, D4, D5};
+
+  size_type *d_A_dim = nullptr, *d_B_dim = nullptr;
+  cudaMalloc(&d_A_dim, 3 * sizeof(size_type));
+  cudaMalloc(&d_B_dim, 3 * sizeof(size_type));
+  cudaMemcpy(d_A_dim, A_dim, 3 * sizeof(size_type), cudaMemcpyHostToDevice);
+  cudaMemcpy(d_B_dim, B_dim, 3 * sizeof(size_type), cudaMemcpyHostToDevice);
+
+  // Launch the kernel
+  tensorContraction3D<<<1, 256>>>(d_C, d_A, d_B, d_A_dim, d_B_dim,
+                                  /*contractDimA=*/2, /*contractDimB=*/0);
+
+  // Copy the result from device to host
+  cudaMemcpy(C, d_C, C_size, cudaMemcpyDeviceToHost);
+
+  // Free device memory
+  cudaFree(d_A);
+  cudaFree(d_B);
+  cudaFree(d_C);
+  cudaFree(d_A_dim);
+  cudaFree(d_B_dim);
 }
 
 int main() {
-    const size_type D1 = 2, D2 = 3, D3 = 4, D4 = 3, D5 = 2;
-    
-    float A[D1][D2][D3] = {
-        { {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12} },
-        { {13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24} }
-    };
-
-    float B[D3][D4][D5] = {
-        { {1, 2}, {3, 4}, {5, 6} },
-        { {7, 8}, {9, 10}, {11, 12} },
-        { {13, 14}, {15, 16}, {17, 18} },
-        { {19, 20}, {21, 22}, {23, 24} }
-    };
-
-    float C[D1][D2][D4][D5] = {0};  // Result tensor
-
-    // Compute the gradient
-    auto tensor_grad = clad::gradient(launchTensorContraction3D, "C, A, B");
-
-    // Initialize the gradient inputs
-    float gradC[D1][D2][D4][D5] = {
-        {
-            { {1, 1}, {1, 1}, {1, 1} }, 
-            { {1, 1}, {1, 1}, {1, 1} },
-            { {1, 1}, {1, 1}, {1, 1} }
-        },
-        {
-            { {1, 1}, {1, 1}, {1, 1} },
-            { {1, 1}, {1, 1}, {1, 1} },
-            { {1, 1}, {1, 1}, {1, 1} }
-        }
-    };
-    float gradA[D1][D2][D3] = {0};
-    float gradB[D3][D4][D5] = {0};
-
-    // Execute tensor contraction and its gradient
-    tensor_grad.execute(&C[0][0][0][0], &A[0][0][0], &B[0][0][0], D1, D2, D3, D4, D5, &gradC[0][0][0][0], &gradA[0][0][0], &gradB[0][0][0]);
-
-    // Print the result
-    std::cout << "Result C:\n";
-    for (int i = 0; i < D1; ++i) {
-        for (int j = 0; j < D2; ++j) {
-            for (int k = 0; k < D4; ++k) {
-                for (int l = 0; l < D5; ++l) {
-                    std::cout << C[i][j][k][l] << " ";
-                }
-                std::cout << "\n";
-            }
-            std::cout << "\n";
-        }
+  const size_type D1 = 2, D2 = 3, D3 = 4, D4 = 3, D5 = 2;
+
+  float A[D1][D2][D3] = {
+      {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}},
+      {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}};
+
+  float B[D3][D4][D5] = {{{1, 2}, {3, 4}, {5, 6}},
+                         {{7, 8}, {9, 10}, {11, 12}},
+                         {{13, 14}, {15, 16}, {17, 18}},
+                         {{19, 20}, {21, 22}, {23, 24}}};
+
+  float C[D1][D2][D4][D5] = {0}; // Result tensor
+
+  // Compute the gradient
+  auto tensor_grad = clad::gradient(launchTensorContraction3D, "C, A, B");
+
+  // Initialize the gradient inputs
+  float gradC[D1][D2][D4][D5] = {{{{1, 1}, {1, 1}, {1, 1}},
+                                  {{1, 1}, {1, 1}, {1, 1}},
+                                  {{1, 1}, {1, 1}, {1, 1}}},
+                                 {{{1, 1}, {1, 1}, {1, 1}},
+                                  {{1, 1}, {1, 1}, {1, 1}},
+                                  {{1, 1}, {1, 1}, {1, 1}}}};
+  float gradA[D1][D2][D3] = {0};
+  float gradB[D3][D4][D5] = {0};
+
+  // Execute tensor contraction and its gradient
+  tensor_grad.execute(&C[0][0][0][0], &A[0][0][0], &B[0][0][0], D1, D2, D3, D4,
+                      D5, &gradC[0][0][0][0], &gradA[0][0][0], &gradB[0][0][0]);
+
+  // Print the result
+  std::cout << "Result C:\n";
+  for (int i = 0; i < D1; ++i) {
+    for (int j = 0; j < D2; ++j) {
+      for (int k = 0; k < D4; ++k) {
+        for (int l = 0; l < D5; ++l)
+          std::cout << C[i][j][k][l] << " ";
         std::cout << "\n";
+      }
+      std::cout << "\n";
     }
-    
-    std::cout << "Result C_grad w.r.t. A:\n";
-    for (int i = 0; i < D1; ++i) {
-        for (int j = 0; j < D2; ++j) {
-            for (int k = 0; k < D3; ++k) {
-                std::cout << gradA[i][j][k] << " ";
-            }
-            std::cout << "\n";
-        }
-        std::cout << "\n";
+    std::cout << "\n";
+  }
+
+  std::cout << "Result C_grad w.r.t. A:\n";
+  for (int i = 0; i < D1; ++i) {
+    for (int j = 0; j < D2; ++j) {
+      for (int k = 0; k < D3; ++k)
+        std::cout << gradA[i][j][k] << " ";
+      std::cout << "\n";
     }
+    std::cout << "\n";
+  }
 
-    std::cout << "Result C_grad w.r.t. B:\n";
-    for (int i = 0; i < D3; ++i) {
-        for (int j = 0; j < D4; ++j) {
-            for (int k = 0; k < D5; ++k) {
-                std::cout << gradB[i][j][k] << " ";
-            }
-            std::cout << "\n";
-        }
-        std::cout << "\n";
+  std::cout << "Result C_grad w.r.t. B:\n";
+  for (int i = 0; i < D3; ++i) {
+    for (int j = 0; j < D4; ++j) {
+      for (int k = 0; k < D5; ++k)
+        std::cout << gradB[i][j][k] << " ";
+      std::cout << "\n";
     }
+    std::cout << "\n";
+  }
 
-    return 0;
+  return 0;
 }
 
 // CHECK-EXEC: Result C:
-// CHECK-NEXT: 130 140 
-// CHECK-NEXT: 150 160 
-// CHECK-NEXT: 170 180 
+// CHECK-NEXT: 130 140
+// CHECK-NEXT: 150 160
+// CHECK-NEXT: 170 180
 // CHECK-NEXT:
-// CHECK-NEXT: 290 316 
-// CHECK-NEXT: 342 368 
-// CHECK-NEXT: 394 420 
+// CHECK-NEXT: 290 316
+// CHECK-NEXT: 342 368
+// CHECK-NEXT: 394 420
 // CHECK-NEXT:
-// CHECK-NEXT: 450 492 
-// CHECK-NEXT: 534 576 
-// CHECK-NEXT: 618 660 
+// CHECK-NEXT: 450 492
+// CHECK-NEXT: 534 576
+// CHECK-NEXT: 618 660
 // CHECK-NEXT:
 // CHECK-NEXT:
-// CHECK-NEXT: 610 668 
-// CHECK-NEXT: 726 784 
-// CHECK-NEXT: 842 900 
+// CHECK-NEXT: 610 668
+// CHECK-NEXT: 726 784
+// CHECK-NEXT: 842 900
 // CHECK-NEXT:
-// CHECK-NEXT: 770 844 
-// CHECK-NEXT: 918 992 
-// CHECK-NEXT: 1066 1140 
+// CHECK-NEXT: 770 844
+// CHECK-NEXT: 918 992
+// CHECK-NEXT: 1066 1140
 // CHECK-NEXT:
-// CHECK-NEXT: 930 1020 
-// CHECK-NEXT: 1110 1200 
-// CHECK-NEXT: 1290 1380 
+// CHECK-NEXT: 930 1020
+// CHECK-NEXT: 1110 1200
+// CHECK-NEXT: 1290 1380
 
 // CHECK-EXEC: Result C_grad w.r.t. A:
-// CHECK-NEXT: 21 57 93 129 
-// CHECK-NEXT: 21 57 93 129 
-// CHECK-NEXT: 21 57 93 129 
+// CHECK-NEXT: 21 57 93 129
+// CHECK-NEXT: 21 57 93 129
+// CHECK-NEXT: 21 57 93 129
 // CHECK-NEXT:
-// CHECK-NEXT: 21 57 93 129 
-// CHECK-NEXT: 21 57 93 129 
-// CHECK-NEXT: 21 57 93 129 
+// CHECK-NEXT: 21 57 93 129
+// CHECK-NEXT: 21 57 93 129
+// CHECK-NEXT: 21 57 93 129
 // CHECK-NEXT:
 // CHECK-EXEC: Result C_grad w.r.t. B:
-// CHECK-NEXT: 66 66 
-// CHECK-NEXT: 66 66 
-// CHECK-NEXT: 66 66 
+// CHECK-NEXT: 66 66
+// CHECK-NEXT: 66 66
+// CHECK-NEXT: 66 66
 // CHECK-NEXT:
-// CHECK-NEXT: 72 72 
-// CHECK-NEXT: 72 72 
-// CHECK-NEXT: 72 72 
+// CHECK-NEXT: 72 72
+// CHECK-NEXT: 72 72
+// CHECK-NEXT: 72 72
 // CHECK-NEXT:
-// CHECK-NEXT: 78 78 
-// CHECK-NEXT: 78 78 
-// CHECK-NEXT: 78 78 
+// CHECK-NEXT: 78 78
+// CHECK-NEXT: 78 78
+// CHECK-NEXT: 78 78
 // CHECK-NEXT:
-// CHECK-NEXT: 84 84 
-// CHECK-NEXT: 84 84 
-// CHECK-NEXT: 84 84 
\ No newline at end of file
+// CHECK-NEXT: 84 84
+// CHECK-NEXT: 84 84
+// CHECK-NEXT: 84 84
\ No newline at end of file