From 53d4ea1f5256bbca2430f241a0aec9afe93b5991 Mon Sep 17 00:00:00 2001
From: Matthew Parno <mparno@solea.com>
Date: Tue, 27 Feb 2024 09:08:04 -0500
Subject: [PATCH 1/8] Fixed pickling of pytorch wrapper.

---
 bindings/python/package/torch.py           | 120 ++-------------------
 bindings/python/package/torch_helpers.py   | 115 ++++++++++++++++++++
 bindings/python/tests/test_TorchWrapper.py |  35 ++++--
 3 files changed, 147 insertions(+), 123 deletions(-)
 create mode 100644 bindings/python/package/torch_helpers.py

diff --git a/bindings/python/package/torch.py b/bindings/python/package/torch.py
index 166a5ecc..d2a1334e 100644
--- a/bindings/python/package/torch.py
+++ b/bindings/python/package/torch.py
@@ -1,115 +1,7 @@
 import torch
+import dill 
 
-def ExtractTorchTensorData(tensor):
-    """ Extracts the pointer, shape, and stride from a pytorch tensor and returns a tuple
-        that can be passed to MParT functions that have been overloaded to accept
-        (double*, std::tuple<int,int>, std::tuple<int,int>) instead of a Kokkos::View.
-
-        Arguments:
-        ------------
-        tensor: pytorch.Tensor
-            The pytorch tensor we want to eventually wrap with a Kokkos view.
-        
-        Returns:
-        ------------
-        Tuple[int, Tuple[int,int], Tuple[int,int]]
-            A python tuple that contains all information needed to construct a Kokkos::View.
-            After casting to c++ types using pybind, this output can be passed to the 
-            mpart::ConstructViewFromPointer function.
-    """
-
-    # Make sure the tensor has double data type
-    if tensor.dtype != torch.float64:
-        raise ValueError(f'Currently only tensors with float64 datatype can be converted.  Current dtype is {tensor.dtype}')
-    
-    if len(tensor.shape)==1:
-        return tensor.data_ptr(), tensor.shape[0], tensor.stride()[0]
-    elif len(tensor.shape)==2:
-        return tensor.data_ptr(), tuple(tensor.shape), tuple(tensor.stride())
-    else:
-        raise ValueError(f'Currently only 1d and 2d tensors can be converted.')
-
-
-class MpartTorchAutograd(torch.autograd.Function):
-
-    @staticmethod
-    def forward(ctx, input, coeffs, f, return_logdet):
-        ctx.save_for_backward(input, coeffs)
-        ctx.f = f
-
-        coeffs_dbl = None 
-        if coeffs is not None:
-            coeffs_dbl = coeffs.double()
-            f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl))
-        input_dbl = input.double()
-
-        output = torch.zeros(f.outputDim, input.shape[1], dtype=torch.double) 
-        f.EvaluateImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(output))
-
-        if return_logdet:
-            logdet = torch.zeros(input.shape[1], dtype=torch.double)
-            f.LogDeterminantImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(logdet))
-            return output.type(input.dtype), logdet.type(input.dtype)
-        else:
-            return output.type(input.dtype)
-
-    @staticmethod
-    def backward(ctx, output_sens, logdet_sens=None):
-        input, coeffs = ctx.saved_tensors
-        f = ctx.f
-        
-        coeffs_dbl = None 
-        if coeffs is not None:
-            coeffs_dbl = coeffs.double()
-            f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl))
-        input_dbl = input.double()
-        output_sens_dbl = output_sens.double()
-
-        logdet_sens_dbl = None 
-        if logdet_sens is not None:
-            logdet_sens_dbl = logdet_sens.double()
-
-        # Get the gradient wrt input 
-        grad = None 
-        if input.requires_grad:          
-            grad = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double)
-
-            f.GradientImpl(ExtractTorchTensorData(input_dbl), 
-                          ExtractTorchTensorData(output_sens_dbl),
-                          ExtractTorchTensorData(grad))
-
-            if logdet_sens is not None:
-                grad2 = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double)
-
-                f.LogDeterminantInputGradImpl(ExtractTorchTensorData(input_dbl), 
-                                              ExtractTorchTensorData(grad2))
-                grad += grad2*logdet_sens_dbl[None,:]
-
-        coeff_grad = None
-        if coeffs is not None:
-            if coeffs.requires_grad:
-                coeff_grad = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double)
-                f.CoeffGradImpl(ExtractTorchTensorData(input_dbl),
-                                ExtractTorchTensorData(output_sens_dbl),
-                                ExtractTorchTensorData(coeff_grad))
-
-                coeff_grad = coeff_grad.sum(axis=1) # pytorch expects total gradient not per-sample gradient
-
-                if logdet_sens is not None:
-                    grad2 = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double)
-
-                    f.LogDeterminantCoeffGradImpl(ExtractTorchTensorData(input_dbl), 
-                                                 ExtractTorchTensorData(grad2))
-                    
-                    coeff_grad += torch.sum(grad2*logdet_sens[None,:],axis=1)
-                    
-        if coeff_grad is not None:
-            coeff_grad = coeff_grad.type(input.dtype)
-        
-        if grad is not None:
-            grad = grad.type(input.dtype)
-        
-        return grad, coeff_grad, None, None  
+from .torch_helpers import ExtractTorchTensorData, MpartTorchAutograd
         
 
 
@@ -118,7 +10,7 @@ class TorchParameterizedFunctionBase(torch.nn.Module):
         can be used with pytorch.  
     """
 
-    def __init__(self, f, store_coeffs=True, dtype=torch.double):
+    def __init__(self, f=None, store_coeffs=True, dtype=torch.double):
         super().__init__()
 
         self.f = f 
@@ -129,7 +21,7 @@ def __init__(self, f, store_coeffs=True, dtype=torch.double):
             self.coeffs = torch.nn.Parameter(coeff_tensor)
         else:
             self.coeffs = None 
-
+    
     def forward(self, x, coeffs=None):
         
         if coeffs is None:
@@ -148,7 +40,7 @@ class TorchConditionalMapBase(torch.nn.Module):
         This can be done either in the constructor or afterwards.
     """
 
-    def __init__(self, f, store_coeffs=True, return_logdet=False, dtype=torch.double):
+    def __init__(self, f=None, store_coeffs=True, return_logdet=False, dtype=torch.double):
         super().__init__()
 
         self.return_logdet = return_logdet
@@ -159,7 +51,7 @@ def __init__(self, f, store_coeffs=True, return_logdet=False, dtype=torch.double
             self.coeffs = torch.nn.Parameter(coeff_tensor)
         else:
             self.coeffs = None 
-
+    
     def forward(self, x, coeffs=None):
         
         if coeffs is None:
diff --git a/bindings/python/package/torch_helpers.py b/bindings/python/package/torch_helpers.py
new file mode 100644
index 00000000..8fe5cfbf
--- /dev/null
+++ b/bindings/python/package/torch_helpers.py
@@ -0,0 +1,115 @@
+import torch
+
+def ExtractTorchTensorData(tensor):
+    """ Extracts the pointer, shape, and stride from a pytorch tensor and returns a tuple
+        that can be passed to MParT functions that have been overloaded to accept
+        (double*, std::tuple<int,int>, std::tuple<int,int>) instead of a Kokkos::View.
+
+        Arguments:
+        ------------
+        tensor: pytorch.Tensor
+            The pytorch tensor we want to eventually wrap with a Kokkos view.
+        
+        Returns:
+        ------------
+        Tuple[int, Tuple[int,int], Tuple[int,int]]
+            A python tuple that contains all information needed to construct a Kokkos::View.
+            After casting to c++ types using pybind, this output can be passed to the 
+            mpart::ConstructViewFromPointer function.
+    """
+
+    # Make sure the tensor has double data type
+    if tensor.dtype != torch.float64:
+        raise ValueError(f'Currently only tensors with float64 datatype can be converted.  Current dtype is {tensor.dtype}')
+    
+    if len(tensor.shape)==1:
+        return tensor.data_ptr(), tensor.shape[0], tensor.stride()[0]
+    elif len(tensor.shape)==2:
+        return tensor.data_ptr(), tuple(tensor.shape), tuple(tensor.stride())
+    else:
+        raise ValueError(f'Currently only 1d and 2d tensors can be converted.')
+
+
+class MpartTorchAutograd(torch.autograd.Function):
+
+    def __reduce__(self):
+        return (self.__class__, (None,))
+    
+    @staticmethod
+    def forward(ctx, input, coeffs, f, return_logdet):
+        ctx.save_for_backward(input, coeffs)
+        ctx.f = f
+
+        coeffs_dbl = None 
+        if coeffs is not None:
+            coeffs_dbl = coeffs.double()
+            f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl))
+        input_dbl = input.double()
+
+        output = torch.zeros(f.outputDim, input.shape[1], dtype=torch.double) 
+        f.EvaluateImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(output))
+
+        if return_logdet:
+            logdet = torch.zeros(input.shape[1], dtype=torch.double)
+            f.LogDeterminantImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(logdet))
+            return output.type(input.dtype), logdet.type(input.dtype)
+        else:
+            return output.type(input.dtype)
+
+    @staticmethod
+    def backward(ctx, output_sens, logdet_sens=None):
+        input, coeffs = ctx.saved_tensors
+        f = ctx.f
+        
+        coeffs_dbl = None 
+        if coeffs is not None:
+            coeffs_dbl = coeffs.double()
+            f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl))
+        input_dbl = input.double()
+        output_sens_dbl = output_sens.double()
+
+        logdet_sens_dbl = None 
+        if logdet_sens is not None:
+            logdet_sens_dbl = logdet_sens.double()
+
+        # Get the gradient wrt input 
+        grad = None 
+        if input.requires_grad:          
+            grad = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double)
+
+            f.GradientImpl(ExtractTorchTensorData(input_dbl), 
+                          ExtractTorchTensorData(output_sens_dbl),
+                          ExtractTorchTensorData(grad))
+
+            if logdet_sens is not None:
+                grad2 = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double)
+
+                f.LogDeterminantInputGradImpl(ExtractTorchTensorData(input_dbl), 
+                                              ExtractTorchTensorData(grad2))
+                grad += grad2*logdet_sens_dbl[None,:]
+
+        coeff_grad = None
+        if coeffs is not None:
+            if coeffs.requires_grad:
+                coeff_grad = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double)
+                f.CoeffGradImpl(ExtractTorchTensorData(input_dbl),
+                                ExtractTorchTensorData(output_sens_dbl),
+                                ExtractTorchTensorData(coeff_grad))
+
+                coeff_grad = coeff_grad.sum(axis=1) # pytorch expects total gradient not per-sample gradient
+
+                if logdet_sens is not None:
+                    grad2 = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double)
+
+                    f.LogDeterminantCoeffGradImpl(ExtractTorchTensorData(input_dbl), 
+                                                 ExtractTorchTensorData(grad2))
+                    
+                    coeff_grad += torch.sum(grad2*logdet_sens[None,:],axis=1)
+                    
+        if coeff_grad is not None:
+            coeff_grad = coeff_grad.type(input.dtype)
+        
+        if grad is not None:
+            grad = grad.type(input.dtype)
+        
+        return grad, coeff_grad, None, None  
\ No newline at end of file
diff --git a/bindings/python/tests/test_TorchWrapper.py b/bindings/python/tests/test_TorchWrapper.py
index 70dbc4d5..5c28a1a8 100644
--- a/bindings/python/tests/test_TorchWrapper.py
+++ b/bindings/python/tests/test_TorchWrapper.py
@@ -7,6 +7,7 @@
 
 import mpart as mt 
 import numpy as np
+import dill 
 
 if haveTorch:
 
@@ -81,6 +82,21 @@ def test_AutogradCoeffs():
         loss.backward()
         assert tmap2.coeffs.grad is not None
     
+    def test_AutogradCoeffAsInput():
+
+        opts = mt.MapOptions()
+        tmap = mt.CreateTriangular(dim,dim,3,opts) # Simple third order map
+
+        tmap2 = tmap.torch(store_coeffs=False)
+
+        x = torch.randn(numSamps, dim, dtype=torch.double)
+        coeffs = torch.randn(tmap.numCoeffs, dtype=torch.double)
+
+        y = tmap2(x,coeffs)
+        assert y.shape[0] == numSamps
+        assert y.shape[1] == dim 
+        assert not y.isnan().any()
+
     def test_TorchMethod():
         opts = mt.MapOptions()
         tmap = mt.CreateTriangular(dim,dim,3,opts) # Simple third order map
@@ -96,20 +112,20 @@ def test_TorchMethod():
         assert np.all(y.detach().numpy() == tmap.Evaluate(x.T.detach().numpy()).T)
         assert np.all(logdet.detach().numpy() == tmap.LogDeterminant(x.T.detach().numpy()))
 
-    def test_AutogradCoeffAsInput():
-
+    def test_TorchPickle():
         opts = mt.MapOptions()
         tmap = mt.CreateTriangular(dim,dim,3,opts) # Simple third order map
 
-        tmap2 = tmap.torch(store_coeffs=False)
-
         x = torch.randn(numSamps, dim, dtype=torch.double)
-        coeffs = torch.randn(tmap.numCoeffs, dtype=torch.double)
+        tmap2 = tmap.torch(store_coeffs=True)
+        y = tmap2.forward(x)
+        
 
-        y = tmap2(x,coeffs)
-        assert y.shape[0] == numSamps
-        assert y.shape[1] == dim 
-        assert not y.isnan().any()
+        map_bytes = dill.dumps(tmap2, dill.HIGHEST_PROTOCOL)
+        tmap3 = dill.loads(map_bytes)
+
+        y2 = tmap3.forward(x)
+        assert (y2-y).abs().max() < 1e-8
 
 
 if __name__=='__main__':
@@ -118,4 +134,5 @@ def test_AutogradCoeffAsInput():
     test_Autograd()
     test_AutogradCoeffs()
     test_TorchMethod()
+    test_TorchPickle()
     test_AutogradCoeffAsInput()
\ No newline at end of file

From d035eee2879565a6457c736198ab1720a48c5fca Mon Sep 17 00:00:00 2001
From: Matthew Parno <mparno@solea.com>
Date: Tue, 27 Feb 2024 09:13:59 -0500
Subject: [PATCH 2/8] Bumped version and added dill dependency so tests can
 run.

---
 .docker/environment.yml          | 1 +
 .github/environment.yml          | 2 +-
 bindings/python/package/torch.py | 3 ---
 pyproject.toml                   | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.docker/environment.yml b/.docker/environment.yml
index 9858ab5e..e470641a 100644
--- a/.docker/environment.yml
+++ b/.docker/environment.yml
@@ -12,3 +12,4 @@ dependencies:
   - gcc
   - gxx
   - make
+  - dill
diff --git a/.github/environment.yml b/.github/environment.yml
index 4864ac20..5bf2b14c 100644
--- a/.github/environment.yml
+++ b/.github/environment.yml
@@ -13,4 +13,4 @@ dependencies:
   - cereal >= 1.3
   - nlopt >= 2.7
   - pytorch
-
+  - dill
diff --git a/bindings/python/package/torch.py b/bindings/python/package/torch.py
index d2a1334e..6882c618 100644
--- a/bindings/python/package/torch.py
+++ b/bindings/python/package/torch.py
@@ -1,10 +1,7 @@
 import torch
-import dill 
-
 from .torch_helpers import ExtractTorchTensorData, MpartTorchAutograd
         
 
-
 class TorchParameterizedFunctionBase(torch.nn.Module):
     """ Defines a wrapper around the MParT ParameterizedFunctionBase class that
         can be used with pytorch.  
diff --git a/pyproject.toml b/pyproject.toml
index 123404c0..56a8c3ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ license={file="LICENSE.txt"}
 readme="README.md"
 requires-python = ">=3.7"
 description="A Monotone Parameterization Toolkit"
-version="2.2.1"
+version="2.2.2"
 keywords=["Measure Transport", "Monotone", "Transport Map", "Isotonic Regression", "Triangular", "Knothe-Rosenblatt"]
 
 [project.urls]

From 0b5e6e39e260f45a0a647e4ebafd53b1f5d4e68f Mon Sep 17 00:00:00 2001
From: Matthew Parno <mparno@solea.com>
Date: Tue, 27 Feb 2024 09:16:38 -0500
Subject: [PATCH 3/8] Added release branch to github workflows.

---
 .github/workflows/build-bindings.yml           | 1 +
 .github/workflows/build-doc.yml                | 1 +
 .github/workflows/build-external-lib-tests.yml | 1 +
 .github/workflows/build-push-docker.yml        | 1 +
 .github/workflows/build-tests.yml              | 1 +
 5 files changed, 5 insertions(+)

diff --git a/.github/workflows/build-bindings.yml b/.github/workflows/build-bindings.yml
index 0f029c4d..4856f39c 100644
--- a/.github/workflows/build-bindings.yml
+++ b/.github/workflows/build-bindings.yml
@@ -3,6 +3,7 @@ name: binding-tests
 on:
   push:
     branches:
+      - release
       - main
   pull_request: {}
 
diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml
index 1214797c..17da50c5 100644
--- a/.github/workflows/build-doc.yml
+++ b/.github/workflows/build-doc.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+      - release
 
 jobs:
   build-docs:
diff --git a/.github/workflows/build-external-lib-tests.yml b/.github/workflows/build-external-lib-tests.yml
index c8cbb73c..bd13ae51 100644
--- a/.github/workflows/build-external-lib-tests.yml
+++ b/.github/workflows/build-external-lib-tests.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+      - release
   pull_request: {}
 
 env:
diff --git a/.github/workflows/build-push-docker.yml b/.github/workflows/build-push-docker.yml
index 2c784397..336a28b7 100644
--- a/.github/workflows/build-push-docker.yml
+++ b/.github/workflows/build-push-docker.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+      - release
 
 jobs:
   docker:
diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml
index 8fabb02a..2ba6d1bf 100644
--- a/.github/workflows/build-tests.yml
+++ b/.github/workflows/build-tests.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+      - release
   pull_request: {}
 
 jobs:

From c93aa58c762dd00ef0a7deb4c50074289e323aae Mon Sep 17 00:00:00 2001
From: Matthew Parno <mparno@solea.com>
Date: Tue, 27 Feb 2024 09:36:54 -0500
Subject: [PATCH 4/8] Added missing include.

---
 tests/Distributions/Test_SampleGenerator.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/Distributions/Test_SampleGenerator.cpp b/tests/Distributions/Test_SampleGenerator.cpp
index 36931920..58abf895 100644
--- a/tests/Distributions/Test_SampleGenerator.cpp
+++ b/tests/Distributions/Test_SampleGenerator.cpp
@@ -1,5 +1,8 @@
 #include "Test_Distributions_Common.h"
 
+#include <numeric>
+#include <algorithm>
+
 TEST_CASE( "Testing SampleGenerator", "[SampleGenerator]") {
 // Sample 1000 points
 // Check empirical CDF against uniform CDF

From 010513a97ba1c92f434d58c46f557fd43c16bfae Mon Sep 17 00:00:00 2001
From: Daniel <43151183+dannys4@users.noreply.github.com>
Date: Thu, 21 Mar 2024 13:38:01 -0400
Subject: [PATCH 5/8] Fixing CxxWrap version for CI (#396)

---
 .github/workflows/build-bindings.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-bindings.yml b/.github/workflows/build-bindings.yml
index 6a1b422a..1757b83a 100644
--- a/.github/workflows/build-bindings.yml
+++ b/.github/workflows/build-bindings.yml
@@ -47,7 +47,7 @@ jobs:
 
       - name: Setup Julia
         run: |
-          julia -e "using Pkg; Pkg.add([\"CxxWrap\",\"TestReports\"])"
+          julia -e "using Pkg; Pkg.add([Pkg.PackageSpec(;name=\"CxxWrap\",version=v\"0.14.2\"),Pkg.PackageSpec(;name=\"TestReports\")])"
           export GITHUB_JULIA_PATH=$(julia -e "println(DEPOT_PATH[1])")
           echo -n $'[bee5971c-294f-5168-9fcd-9fb3c811d495]\nMParT = \"' >> $GITHUB_JULIA_PATH/artifacts/Overrides.toml
           echo -n $GITHUB_WORKSPACE                                     >> $GITHUB_JULIA_PATH/artifacts/Overrides.toml

From de521c1402cc10f02d775a4496a93ad6c9e2ca24 Mon Sep 17 00:00:00 2001
From: Daniel <43151183+dannys4@users.noreply.github.com>
Date: Mon, 25 Mar 2024 12:25:14 -0400
Subject: [PATCH 6/8] Re-adding GPU Operation (#391)

* Fix SetCoeffs

* fixed ParameterizedFunctionBase bugs

* added missing templates

* fixed SetCoeff bugs

* fixed wrapCoeffs

* implemented wrapCoeffs with shallow copy

* used correct WrapCoeffs function for eigen vector coeffs

* fixed gpu bug in ConditionalMapBase

* conditional map base gpu bug fix

* removed unnecissary function

* ConditionalMapBase gpu bug fix (#373)

* fixed gpu bug in ConditionalMapBase

* conditional map base gpu bug fix

* removed unnecissary function

* fixed wrap coeffs gpu error in ComposedMap

* working on composedmap setCoeffs bug

* Merge changes from main (#379)

* fixed Summarized Map GPU bug

* fixed MapFactory GPU bugs

* Standardized notation

* ComposedMap, SummarizedMap, and MapFactory GPU bug fixes (#382)

* MapFactoryImpls compiling

* addec correct space when using gpu

* main library compiles with gpu

* Fixing some small gpu issues

* MapFactoryImpl Fixes (#387)

* GPU fully compiling (#389)

* Pass tests on gpu (#390)

* removed comments

* added functor

* fixed functor

* Fixing MapObjective

* added non const set coeffs function

* fixed non kokkos inline function error

* Fixing sigmoid.h and small LA

* working on tohost

* Fix viewtype tohost

* added documentation for array conversions

* removed documentation for array conversions

* fixed commenting bug

* moved comments

* updated setcoeffs and documentation

* fixed julia binidngs error

* removed virtual from non virtual setCoeffs

* fixed julia binding error

* fixed julia binding error part2

* fixed undefined device space

* renamed device space tests

* Adding new policy fixes

* started fixing kokkos parallel for issues

* continued updating parallel for

* updating parallel_for in tests

* updated parallel reduce

* Fix warnings, format

* Fix density errors

* Compiling RunTests

* Working tests

* Fix serial codebase

* Compiling Serialization and python

* Fixing mapfactoryimpl warnings

* Working python bindings

* fixing git issues

* fixed binding templating

* Finish gpu compile (#394)

* Fix loop in innermarginalaffine

* Remove unused variables

* Fix GPU and Test warnings/errors

* Test sigmoids on GPU

* Working python bindings for device sigmoid component

* Fix CPU compile error

* Fix test_MC

* fixed Multivariate Test

---------

Co-authored-by: Daniel Sharp <dannys4@mit.edu>
Co-authored-by: MMRROO <mrowkamaad@gmail.com>
Co-authored-by: Matt Parno <parnomd@gmail.com>
---
 MParT/ComposedMap.h                           |   8 +-
 MParT/Distributions/DensityBase.h             |  14 +-
 MParT/Distributions/PushforwardDensity.h      |   5 +-
 MParT/MapFactory.h                            |  19 +-
 MParT/MonotoneComponent.h                     |  10 +-
 MParT/MonotoneIntegrand.h                     |   2 +-
 MParT/MultivariateExpansionWorker.h           |   2 -
 MParT/ParameterizedFunctionBase.h             |  14 +-
 MParT/Quadrature.h                            |  22 +-
 MParT/RectifiedMultivariateExpansion.h        |  20 +-
 MParT/Sigmoid.h                               |  29 +--
 MParT/SummarizedMap.h                         |   9 +-
 MParT/UnivariateExpansion.h                   |   4 +-
 MParT/Utilities/ArrayConversions.h            |  38 +--
 MParT/Utilities/LinearAlgebra.h               |   5 +-
 MParT/Utilities/Miscellaneous.h               |  25 +-
 MParT/Utilities/RootFinding.h                 |   1 -
 MParT/Utilities/Serialization.h               |  17 +-
 .../julia/src/ParameterizedFunctionBase.cpp   |   7 +-
 bindings/python/src/CommonPybindUtilities.cpp |   2 +-
 bindings/python/src/ConditionalMapBase.cpp    |   6 +-
 bindings/python/src/MapObjective.cpp          |   6 +-
 .../python/src/ParameterizedFunctionBase.cpp  |   2 +-
 bindings/python/src/Serialization.cpp         |  13 +-
 bindings/python/src/TrainMapAdaptive.cpp      |   3 +-
 bindings/python/src/TriangularMap.cpp         |  10 +-
 bindings/python/src/Wrapper.cpp               |   5 +-
 src/AffineMap.cpp                             |   3 +-
 src/ComposedMap.cpp                           |  33 ++-
 src/ConditionalMapBase.cpp                    |   7 +-
 src/Distributions/DensityBase.cpp             |  95 +++++--
 src/Distributions/GaussianSamplerDensity.cpp  |  27 +-
 src/InnerMarginalAffineMap.cpp                |  28 +-
 src/MapFactory.cpp                            |  46 ++--
 src/MapFactoryImpl1.cpp                       |   7 +-
 src/MapFactoryImpl10.cpp                      |   8 +-
 src/MapFactoryImpl11.cpp                      |   8 +-
 src/MapFactoryImpl12.cpp                      |   6 +-
 src/MapFactoryImpl13.cpp                      |   8 +-
 src/MapFactoryImpl14.cpp                      |   8 +-
 src/MapFactoryImpl15.cpp                      |   8 +-
 src/MapFactoryImpl16.cpp                      |  12 +-
 src/MapFactoryImpl17.cpp                      |  12 +-
 src/MapFactoryImpl18.cpp                      |  12 +-
 src/MapFactoryImpl2.cpp                       |   7 +-
 src/MapFactoryImpl3.cpp                       |   8 +-
 src/MapFactoryImpl4.cpp                       |   8 +-
 src/MapFactoryImpl5.cpp                       |   8 +-
 src/MapFactoryImpl6.cpp                       |   8 +-
 src/MapFactoryImpl7.cpp                       |   8 +-
 src/MapFactoryImpl8.cpp                       |   8 +-
 src/MapFactoryImpl9.cpp                       |   8 +-
 src/MapObjective.cpp                          |  43 ++--
 src/MultiIndices/FixedMultiIndexSet.cpp       |  41 +--
 src/ParameterizedFunctionBase.cpp             |  79 +++---
 src/SummarizedMap.cpp                         |  25 +-
 src/TrainMap.cpp                              |   8 +-
 src/Utilities/LinearAlgebra.cpp               |   8 +-
 tests/Distributions/Test_DensityBase.cpp      |   5 +-
 .../Distributions/Test_Distributions_Common.h |   8 +-
 .../Test_GaussianDistribution.cpp             |  14 +-
 tests/Distributions/Test_TransportDensity.cpp |   6 +-
 tests/Distributions/Test_TransportSampler.cpp |   6 +-
 tests/MultiIndices/Test_MultiIndexSet.cpp     |  27 +-
 tests/Test_BasisEvaluator.cpp                 |   8 +-
 tests/Test_InnerMarginalAffineMap.cpp         |   1 -
 tests/Test_LinearAlgebra.cpp                  |  42 +--
 tests/Test_MapFactory.cpp                     |   8 +-
 tests/Test_MapObjective.cpp                   |   4 +-
 tests/Test_MonotoneComponent.cpp              |  25 +-
 tests/Test_MultivariateExpansion.cpp          |   3 +-
 tests/Test_MultivariateExpansionWorker.cpp    |   5 +-
 tests/Test_OrthogonalPolynomials.cpp          |   6 +-
 tests/Test_PositiveBijectors.cpp              |  10 +-
 tests/Test_Quadrature.cpp                     |   3 -
 tests/Test_RectifiedMultivariateExpansion.cpp |   1 -
 tests/Test_RootFinding.cpp                    |   2 +-
 tests/Test_Serialization.cpp                  |   1 -
 tests/Test_Sigmoid.cpp                        | 240 +++++++++++-------
 tests/Test_SummarizedMap.cpp                  |   1 -
 tests/Test_TrainMap.cpp                       |   9 +-
 tests/Test_TrainMapAdaptive.cpp               |   4 +-
 tests/Test_TriangularMap.cpp                  |   4 -
 tests/Test_UnivariateExpansion.cpp            |   1 -
 84 files changed, 773 insertions(+), 564 deletions(-)

diff --git a/MParT/ComposedMap.h b/MParT/ComposedMap.h
index c51d6160..0b9f55de 100644
--- a/MParT/ComposedMap.h
+++ b/MParT/ComposedMap.h
@@ -50,12 +50,12 @@ class ComposedMap : public ConditionalMapBase<MemorySpace>
                   start at index \f$\sum_{j=1}^{k-1} C_j\f$.
     */
     using ConditionalMapBase<MemorySpace>::SetCoeffs;
-    void SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs) override;
-    void WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs) override;
+    using ConditionalMapBase<MemorySpace>::WrapCoeffs;
+    void SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs) override;
+    void WrapCoeffs(Kokkos::View<double*, MemorySpace> coeffs) override;
 
     #if defined(MPART_ENABLE_GPU)
-    void SetCoeffs(Kokkos::View<const double*, Kokkos::DefaultExecutionSpace::memory_space> coeffs) override;
-    void WrapCoeffs(Kokkos::View<double*, mpart::DeviceSpace> coeffs) override;
+    void SetCoeffs(Kokkos::View<const double*, std::conditional_t<std::is_same_v<MemorySpace, Kokkos::HostSpace>, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs) override;
     #endif
 
     virtual std::shared_ptr<ConditionalMapBase<MemorySpace>> GetComponent(unsigned int i){ return maps_.at(i);}
diff --git a/MParT/Distributions/DensityBase.h b/MParT/Distributions/DensityBase.h
index 5ab27dfb..050412d1 100644
--- a/MParT/Distributions/DensityBase.h
+++ b/MParT/Distributions/DensityBase.h
@@ -47,8 +47,10 @@ class DensityBase {
      * @param X The points where we want to evaluate the log density.
      * @return Matrix \f$A\in\mathbb{R}^{m\times n}\f$ containing the log density at each point.
      */
-    template<typename AnyMemorySpace>
-    StridedVector<double, AnyMemorySpace> LogDensity(StridedMatrix<const double, AnyMemorySpace> const &X);
+    StridedVector<double, MemorySpace> LogDensity(StridedMatrix<const double, MemorySpace> const &X);
+    #if defined(MPART_ENABLE_GPU)
+    StridedVector<double, std::conditional_t< std::is_same_v<MemorySpace, Kokkos::HostSpace> , DeviceSpace, Kokkos::HostSpace> > LogDensity(StridedMatrix<const double, std::conditional_t< std::is_same_v<MemorySpace, Kokkos::HostSpace> , DeviceSpace, Kokkos::HostSpace > > const &X);
+    #endif
 
     /**
      * @brief Computes the log density at the given points.
@@ -66,8 +68,10 @@ class DensityBase {
      * @param X The points where we want to evaluate the gradient log density.
      * @return A matrix \f$A\in\mathbb{R}^{m\times n}\f$ containing the gradient of the log density at each point.
      */
-    template<typename AnyMemorySpace>
-    StridedMatrix<double, AnyMemorySpace> LogDensityInputGrad(StridedMatrix<const double, AnyMemorySpace> const &X);
+    StridedMatrix<double, MemorySpace> LogDensityInputGrad(StridedMatrix<const double, MemorySpace> const &X);
+    #if defined(MPART_ENABLE_GPU)
+    StridedMatrix<double, std::conditional_t< std::is_same_v<MemorySpace, Kokkos::HostSpace> , DeviceSpace, Kokkos::HostSpace> > LogDensityInputGrad(StridedMatrix<const double, std::conditional_t< std::is_same_v<MemorySpace, Kokkos::HostSpace> , DeviceSpace, Kokkos::HostSpace > > const &X);
+    #endif
 
     /**
      * @brief Returns the input dimension of the density
@@ -81,4 +85,4 @@ class DensityBase {
 };
 }
 
-#endif // MPART_DensityBase_H
\ No newline at end of file
+#endif // MPART_DensityBase_H
diff --git a/MParT/Distributions/PushforwardDensity.h b/MParT/Distributions/PushforwardDensity.h
index 74537bca..9656af37 100644
--- a/MParT/Distributions/PushforwardDensity.h
+++ b/MParT/Distributions/PushforwardDensity.h
@@ -46,7 +46,8 @@ class PushforwardDensity: public DensityBase<MemorySpace> {
         StridedMatrix<double, MemorySpace> mappedPts = map_->Inverse(prefix_null, pts);
         density_->LogDensityImpl(mappedPts, output);
         StridedVector<double, MemorySpace> logJacobian = map_->LogDeterminant(mappedPts);
-        Kokkos::parallel_for("Subtract logJac", output.extent(0), KOKKOS_LAMBDA(const unsigned int i){
+	Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy{0lu, output.extent(0)};
+        Kokkos::parallel_for("Subtract logJac", policy, KOKKOS_LAMBDA(const unsigned int i){
             output(i) -= logJacobian(i);
         });
     };
@@ -71,4 +72,4 @@ class PushforwardDensity: public DensityBase<MemorySpace> {
 
 } // namespace mpart
 
-#endif //MPART_PUSHFORWARDDENSITY_H
\ No newline at end of file
+#endif //MPART_PUSHFORWARDDENSITY_H
diff --git a/MParT/MapFactory.h b/MParT/MapFactory.h
index d02ec57a..b9ef2f38 100644
--- a/MParT/MapFactory.h
+++ b/MParT/MapFactory.h
@@ -140,12 +140,13 @@ namespace mpart{
             unsigned int inputDim, unsigned int totalOrder, StridedVector<const double, MemorySpace> centers,
             MapOptions opts);
 
-        template<typename MemorySpace, std::enable_if_t<std::is_same_v<MemorySpace, Kokkos::HostSpace>, bool> = true>
-        std::shared_ptr<ConditionalMapBase<Kokkos::HostSpace>> CreateSigmoidComponent(
+        template<typename MemorySpace>
+        std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateSigmoidComponent(
             unsigned int inputDim, unsigned int totalOrder, Eigen::Ref<const Eigen::RowVectorXd> centers,
             MapOptions opts) {
             StridedVector<const double, Kokkos::HostSpace> centersVec = ConstVecToKokkos<double, Kokkos::HostSpace>(centers);
-            return CreateSigmoidComponent<Kokkos::HostSpace>(inputDim, totalOrder, centersVec, opts);
+            Kokkos::View<const double*, MemorySpace> centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centersVec);
+            return CreateSigmoidComponent<MemorySpace>(inputDim, totalOrder, centers_d, opts);
         }
 
         template<typename MemorySpace>
@@ -153,12 +154,13 @@ namespace mpart{
             FixedMultiIndexSet<MemorySpace> mset_offdiag, FixedMultiIndexSet<MemorySpace> mset_diag,
             StridedVector<const double, MemorySpace> centers, MapOptions opts);
 
-        template<typename MemorySpace, std::enable_if_t<std::is_same_v<MemorySpace, Kokkos::HostSpace>, bool> = true>
+        template<typename MemorySpace>
         std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateSigmoidComponent(
             FixedMultiIndexSet<MemorySpace> mset_offdiag, FixedMultiIndexSet<MemorySpace> mset_diag,
             Eigen::Ref<const Eigen::RowVectorXd> centers, MapOptions opts) {
             StridedVector<const double, Kokkos::HostSpace> centersVec = ConstVecToKokkos<double, Kokkos::HostSpace>(centers);
-            return CreateSigmoidComponent<Kokkos::HostSpace>(mset_offdiag, mset_diag, centersVec, opts);
+            Kokkos::View<const double*, MemorySpace> centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centersVec);
+            return CreateSigmoidComponent<MemorySpace>(mset_offdiag, mset_diag, centers_d, opts);
         }
 
         template<typename MemorySpace>
@@ -180,13 +182,14 @@ namespace mpart{
             return CreateSigmoidTriangular<MemorySpace>(inputDim, outputDim, totalOrder, centersVecs, opts);
         }
 
-        template<typename MemorySpace, std::enable_if_t<std::is_same_v<MemorySpace, Kokkos::HostSpace>, bool> = true>
-        std::shared_ptr<ConditionalMapBase<Kokkos::HostSpace>> CreateSigmoidTriangular(
+        template<typename MemorySpace>
+        std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateSigmoidTriangular(
             unsigned int inputDim, unsigned int outputDim, unsigned int totalOrder,
             Eigen::Ref<const Eigen::RowMatrixXd> const& centers, MapOptions opts
         ) {
             StridedMatrix<const double, Kokkos::HostSpace> centersMat = ConstRowMatToKokkos<double,Kokkos::HostSpace>(centers);
-            return CreateSigmoidTriangular<Kokkos::HostSpace>(inputDim, outputDim, totalOrder, centersMat, opts);
+            Kokkos::View<const double**, MemorySpace> centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centersMat);
+            return CreateSigmoidTriangular<MemorySpace>(inputDim, outputDim, totalOrder, centers_d, opts);
         }
 
         /** This struct is used to map the options to functions that can create a map component with types corresponding
diff --git a/MParT/MonotoneComponent.h b/MParT/MonotoneComponent.h
index bbdfd2f0..65097100 100644
--- a/MParT/MonotoneComponent.h
+++ b/MParT/MonotoneComponent.h
@@ -141,7 +141,7 @@ class MonotoneComponent : public ConditionalMapBase<MemorySpace>
             "sens: (" << sensRows << "," << sensCols << "), expected: " << this->outputDim << ", " << ptsCols << "), "
             << "pts: (" << ptsRows << "," << ptsCols << "), expected: (" << this->inputDim << "," << ptsCols << "), "
             << "output: (" << outputRows << "," << outputCols << "), expected: (" << expectedOutputRows << "," << ptsCols << ")";
-            ProcAgnosticError<MemorySpace,std::invalid_argument>::error(ss.str().c_str());
+            ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
         }
     }
 
@@ -262,7 +262,7 @@ class MonotoneComponent : public ConditionalMapBase<MemorySpace>
             std::stringstream ss;
             ss << "EvaluateImpl: output has incorrect number of columns. "
             << "Expected: " << pts.extent(1) << ", got " << output.extent(0);
-            ProcAgnosticError<MemorySpace,std::invalid_argument>::error(ss.str().c_str());
+            ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
         }
 
         // Ask the expansion how much memory it would like for its one-point cache
@@ -663,7 +663,7 @@ class MonotoneComponent : public ConditionalMapBase<MemorySpace>
                << "jacobian: (" << jacRows << "," << jacCols << "), expected: (" << expectJacRows << "," << expectJacCols << "), ";
             if(expectEvalRows > 0)
                 ss << "evaluations: (" << evalRows << "), expected: (" << expectEvalRows << ")";
-            ProcAgnosticError<MemorySpace,std::invalid_argument>::error(ss.str().c_str());
+            ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
         }
     }
 
@@ -1082,9 +1082,9 @@ class MonotoneComponent : public ConditionalMapBase<MemorySpace>
         ExpansionType expansion;
         double nugget;
 
-        SingleEvaluator(double* workspace_, double* cache_, PointType pt_, CoeffType coeffs_, QuadratureType quad_, ExpansionType expansion_, double nugget_):
+        KOKKOS_FUNCTION SingleEvaluator(double* workspace_, double* cache_, PointType pt_, CoeffType coeffs_, QuadratureType quad_, ExpansionType expansion_, double nugget_):
             workspace(workspace_), cache(cache_), pt(pt_), coeffs(coeffs_), quad(quad_), expansion(expansion_), nugget(nugget_) {};
-        double operator()(double x) {
+        KOKKOS_INLINE_FUNCTION double operator()(double x) {
             return EvaluateSingle(cache, workspace, pt, x, coeffs, quad, expansion, nugget);
         }
     };
diff --git a/MParT/MonotoneIntegrand.h b/MParT/MonotoneIntegrand.h
index cf382b83..c3481cf8 100644
--- a/MParT/MonotoneIntegrand.h
+++ b/MParT/MonotoneIntegrand.h
@@ -183,7 +183,7 @@ class MonotoneIntegrand{
         // Check for infs or nans
         if(std::isinf(gf)){
             if(failOnNaN) {
-                ProcAgnosticError<MemorySpace,std::runtime_error>::error("MonotoneIntegrand: nan was encountered in value of g(df(...)). Use MonotoneIntegrand::setFailOnNaN for enabling NaN propagation.");
+                ProcAgnosticError<std::runtime_error>("MonotoneIntegrand: nan was encountered in value of g(df(...)). Use MonotoneIntegrand::setFailOnNaN for enabling NaN propagation.");
             } else {
                 printf("\nERROR: In MonotoneIntegrand, value of g(df(...)) is inf.  The value of df(...) is %0.4f, and the value of f(df(...)) is %0.4f.\n\n", df, gf);
             }
diff --git a/MParT/MultivariateExpansionWorker.h b/MParT/MultivariateExpansionWorker.h
index 80527250..2df29d86 100644
--- a/MParT/MultivariateExpansionWorker.h
+++ b/MParT/MultivariateExpansionWorker.h
@@ -339,7 +339,6 @@ class MultivariateExpansionWorker
     {
         const unsigned int numTerms = multiSet_.Size();
         double df = 0.0;
-        unsigned int posInd;
 
         for(int wrt=-1; wrt<int(dim_); ++wrt){
             if(wrt>=0){
@@ -533,7 +532,6 @@ class MultivariateExpansionWorker
     KOKKOS_FUNCTION double GetTermValMixedCoeffDeriv(unsigned int termInd, int wrt, const double* cache, unsigned int posIndex) const {
         // Compute the value of this term in the expansion
         double termVal = 1.0;
-        bool hasDeriv = false;
         if(multiSet_.nzStarts(termInd)==multiSet_.nzStarts(termInd+1)) return 0.; // Value is zero if constant in last dimension
         unsigned int end_idx = multiSet_.nzStarts(termInd+1)-1; // last index in loop
         if(multiSet_.nzDims(end_idx)!=dim_-1) return 0.; // Value is zero if constant in last dimension
diff --git a/MParT/ParameterizedFunctionBase.h b/MParT/ParameterizedFunctionBase.h
index 94e14383..c8cbebcc 100644
--- a/MParT/ParameterizedFunctionBase.h
+++ b/MParT/ParameterizedFunctionBase.h
@@ -51,10 +51,8 @@ namespace mpart {
             @param coeffs A view containing the coefficients to copy.
         */
        virtual void SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs);
+               void SetCoeffs(Kokkos::View<      double*, MemorySpace> coeffs);
 
-       #if defined(MPART_ENABLE_GPU)
-       void SetCoeffs(Kokkos::View<const double*, std::conditional_t<std::is_same_v<MemorySpace,Kokkos::HostSpace>,mpart::DeviceSpace,Kokkos::HostSpace>> coeffs);
-       #endif
 
         /** @brief Wrap the internal coefficient view around another view.
             @details Performs a shallow copy of the input coefficients to the internally stored coefficients.
@@ -62,12 +60,12 @@ namespace mpart {
             internally stored view.
             @param coeffs A view containing the coefficients we want to wrap.
         */
-        virtual void WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs);
+        virtual void WrapCoeffs(Kokkos::View<double*, MemorySpace> coeffs);
 
         #if defined(MPART_ENABLE_GPU)
-        virtual void SetCoeffs(Kokkos::View<const double*, mpart::DeviceSpace> coeffs);
-        virtual void WrapCoeffs(Kokkos::View<double*, mpart::DeviceSpace> coeffs);
-        #endif
+        virtual void SetCoeffs(Kokkos::View<const double*, std::conditional_t<std::is_same_v<Kokkos::HostSpace,MemorySpace>, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs);
+	            void SetCoeffs(Kokkos::View<      double*, std::conditional_t<std::is_same_v<Kokkos::HostSpace,MemorySpace>, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs);
+	    #endif
 
         /** SetCoeffs function with conversion from Eigen to Kokkos vector types.*/
         virtual void SetCoeffs(Eigen::Ref<Eigen::VectorXd> coeffs);
@@ -249,4 +247,4 @@ namespace mpart {
     }; // class ParameterizedFunctionBase
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/MParT/Quadrature.h b/MParT/Quadrature.h
index 81b46fbe..a980de8d 100644
--- a/MParT/Quadrature.h
+++ b/MParT/Quadrature.h
@@ -9,6 +9,8 @@
 
 #include <Eigen/Core>
 
+#include "MParT/Utilities/KokkosSpaceMappings.h"
+
 #if defined(MPART_HAS_CEREAL)
 #include <cereal/access.hpp>
 #include <cereal/types/base_class.hpp>
@@ -353,8 +355,8 @@ struct GetRuleFunctor{
 template<typename MemorySpace>
 inline ClenshawCurtisQuadrature<MemorySpace>::ClenshawCurtisQuadrature(unsigned int numPts, unsigned int maxDim) : QuadratureBase<MemorySpace>(maxDim,maxDim),  pts_("Points", numPts), wts_("Weights", numPts), numPts_(numPts)
 {
-    // TODO: Add parallel for loop here with one thread to make sure rule is filled in the correct space
-    Kokkos::parallel_for(1, GetRuleFunctor<MemorySpace>(numPts, pts_.data(), wts_.data()));
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0,1};
+    Kokkos::parallel_for(policy, GetRuleFunctor<MemorySpace>(numPts, pts_.data(), wts_.data()));
 };
 
 template<>
@@ -367,8 +369,8 @@ inline ClenshawCurtisQuadrature<Kokkos::HostSpace>::ClenshawCurtisQuadrature(uns
 template<typename MemorySpace>
 inline ClenshawCurtisQuadrature<MemorySpace>::ClenshawCurtisQuadrature(unsigned int numPts, unsigned int maxDim, double* workspace) : QuadratureBase<MemorySpace>(maxDim,maxDim,workspace),  pts_("Points", numPts), wts_("Weights", numPts), numPts_(numPts)
 {
-    // TODO: Add parallel for loop here with one thread to make sure rule is filled in the correct space
-    Kokkos::parallel_for(1, GetRuleFunctor<MemorySpace>(numPts, pts_.data(), wts_.data()));
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0,1};
+    Kokkos::parallel_for(policy, GetRuleFunctor<MemorySpace>(numPts, pts_.data(), wts_.data()));
 };
 
 template<>
@@ -1020,9 +1022,9 @@ inline AdaptiveClenshawCurtis<MemorySpace>::AdaptiveClenshawCurtis(unsigned int
                                                             fineWts_("Coarse Pts", std::pow(2,level+1)+1)
 {
     assert(std::pow(2,level)+1 >=3);
-
-    Kokkos::parallel_for(1, GetRuleFunctor<MemorySpace>(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data()));
-    Kokkos::parallel_for(1, GetRuleFunctor<MemorySpace>(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data()));
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0,1};
+    Kokkos::parallel_for(policy, GetRuleFunctor<MemorySpace>(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data()));
+    Kokkos::parallel_for(policy, GetRuleFunctor<MemorySpace>(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data()));
 };
 
 template<>
@@ -1060,9 +1062,9 @@ inline AdaptiveClenshawCurtis<MemorySpace>::AdaptiveClenshawCurtis(unsigned int
                                                             fineWts_("Coarse Pts", std::pow(2,level+1)+1)
 {
     assert(std::pow(2,level)+1 >=3);
-
-    Kokkos::parallel_for(1, GetRuleFunctor<MemorySpace>(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data()));
-    Kokkos::parallel_for(1, GetRuleFunctor<MemorySpace>(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data()));
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0, 1};
+    Kokkos::parallel_for(policy, GetRuleFunctor<MemorySpace>(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data()));
+    Kokkos::parallel_for(policy, GetRuleFunctor<MemorySpace>(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data()));
 };
 
 template<>
diff --git a/MParT/RectifiedMultivariateExpansion.h b/MParT/RectifiedMultivariateExpansion.h
index 83980264..6b369b7b 100644
--- a/MParT/RectifiedMultivariateExpansion.h
+++ b/MParT/RectifiedMultivariateExpansion.h
@@ -72,7 +72,7 @@ namespace mpart{
 
                     // Create a subview containing only the current point
                     auto pt = Kokkos::subview(pts, Kokkos::ALL(), ptInd);
-                    auto pt_off = Kokkos::subview(pt, std::pair<int,int>(0,pt.size()-1));
+                    auto pt_off = Kokkos::subview(pt, Kokkos::pair<int,int>(0,pt.size()-1));
 
                     // Get a pointer to the shared memory that Kokkos set up for this team
                     Kokkos::View<double*,MemorySpace> cache(team_member.thread_scratch(1), cacheSize);
@@ -126,11 +126,11 @@ namespace mpart{
 
                     // Create a subview containing only the current point
                     auto pt = Kokkos::subview(pts, Kokkos::ALL(), ptInd);
-                    auto pt_off = Kokkos::subview(pts, std::pair<int,int>(0,pt.size()-1), ptInd);
+                    auto pt_off = Kokkos::subview(pts, Kokkos::pair<int,int>(0,pt.size()-1), ptInd);
 
                     // Get a pointer to the shared memory that Kokkos set up for this team
-                    Kokkos::View<double*,MemorySpace> cache(team_member.thread_scratch(1), cacheSize);
-                    Kokkos::View<double*,MemorySpace> grad_off(team_member.thread_scratch(1), inDim-1);
+                    Kokkos::View<double*, MemorySpace> cache(team_member.thread_scratch(1), cacheSize);
+                    Kokkos::View<double*, MemorySpace> grad_off(team_member.thread_scratch(1), inDim-1);
                     StridedVector<double, MemorySpace> grad_out = Kokkos::subview(output, Kokkos::ALL(), ptInd);
 
                     // Fill in entries in the cache that are independent of x_d
@@ -188,7 +188,7 @@ namespace mpart{
 
                     // Create a subview containing only the current point
                     auto pt = Kokkos::subview(pts, Kokkos::ALL(), ptInd);
-                    auto pt_off = Kokkos::subview(pt, std::pair<int,int>(0,pt.extent(0)-1));
+                    auto pt_off = Kokkos::subview(pt, Kokkos::pair<int,int>(0,pt.extent(0)-1));
 
                     // Get a pointer to the shared memory that Kokkos set up for this team
                     Kokkos::View<double*,MemorySpace> cache(team_member.thread_scratch(1), cacheSize);
@@ -232,7 +232,7 @@ namespace mpart{
             unsigned int cacheSize = worker_diag.CacheSize();
 
             // Take logdet of diagonal expansion
-            auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy<ExecutionSpace>::member_type team_member) {
+            auto functor = KOKKOS_CLASS_LAMBDA (typename Kokkos::TeamPolicy<ExecutionSpace>::member_type team_member) {
 
                 unsigned int ptInd = team_member.league_rank () * team_member.team_size () + team_member.team_rank ();
 
@@ -333,7 +333,7 @@ namespace mpart{
             unsigned int cacheSize = worker_diag.CacheSize();
 
             // Take logdet of diagonal expansion
-            auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy<ExecutionSpace>::member_type team_member) {
+            auto functor = KOKKOS_CLASS_LAMBDA (typename Kokkos::TeamPolicy<ExecutionSpace>::member_type team_member) {
 
                 unsigned int ptInd = team_member.league_rank () * team_member.team_size () + team_member.team_rank ();
 
@@ -386,7 +386,7 @@ namespace mpart{
             unsigned int cacheSize = worker_diag.CacheSize();
 
             // Take logdet of diagonal expansion
-            auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy<ExecutionSpace>::member_type team_member) {
+            auto functor = KOKKOS_CLASS_LAMBDA (typename Kokkos::TeamPolicy<ExecutionSpace>::member_type team_member) {
 
                 unsigned int ptInd = team_member.league_rank () * team_member.team_size () + team_member.team_rank ();
 
@@ -437,9 +437,9 @@ namespace mpart{
             CoeffType coeffs;
             DiagWorker_T worker;
 
-            SingleWorkerEvaluator(double* cache_, PointType pt_, CoeffType coeffs_, DiagWorker_T worker_):
+            KOKKOS_INLINE_FUNCTION SingleWorkerEvaluator(double* cache_, PointType pt_, CoeffType coeffs_, DiagWorker_T worker_):
                 cache(cache_), pt(pt_), coeffs(coeffs_), worker(worker_) {}
-            double operator()(double x) {
+            KOKKOS_INLINE_FUNCTION double operator()(double x) {
                 worker.FillCache2(cache, pt, x, DerivativeFlags::None);
                 return worker.Evaluate(cache, coeffs);
             }
diff --git a/MParT/Sigmoid.h b/MParT/Sigmoid.h
index 09739d4b..e31ec9a9 100644
--- a/MParT/Sigmoid.h
+++ b/MParT/Sigmoid.h
@@ -109,7 +109,7 @@ class Sigmoid1d {
 			  Kokkos::View<double*, MemorySpace> widths)
 			: centers_(centers), widths_(widths) {
 		Kokkos::View<double*, MemorySpace> weights ("Sigmoid weights", centers.extent(0));
-		Kokkos::parallel_for(centers.extent(0), KOKKOS_LAMBDA(unsigned int i) { weights(i) = 1.; });
+		Kokkos::deep_copy(weights, 1.0);
 		weights_ = weights;
 		Validate();
 	}
@@ -123,12 +123,7 @@ class Sigmoid1d {
 	 */
 	KOKKOS_FUNCTION void EvaluateAll(double* output, int max_order, double input) const {
 		if (order_ < max_order) {
-			std::stringstream ss;
-			ss << "Sigmoid basis evaluation order too large.\n";
-			ss << "Given order " << max_order << ", ";
-			ss << "can only evaluate up to order " << order_;
-			ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
-				ss.str().c_str());
+			ProcAgnosticError<std::invalid_argument>("Sigmoid basis evaluation order too large.");
 		}
 
 		output[0] = 1.;
@@ -167,12 +162,7 @@ class Sigmoid1d {
 	KOKKOS_FUNCTION void EvaluateDerivatives(double* output, double* output_diff, int max_order,
 							 double input) const {
 		if (order_ < max_order) {
-			std::stringstream ss;
-			ss << "Sigmoid basis evaluation order too large.\n";
-			ss << "Given order " << max_order << ", ";
-			ss << "can only evaluate up to order " << order_;
-			ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
-				ss.str().c_str());
+			ProcAgnosticError<std::invalid_argument>("Sigmoid derivative evaluation order too large.");
 		}
 
 		output[0] = 1.;
@@ -222,12 +212,7 @@ class Sigmoid1d {
 								   double* output_diff2, int max_order,
 								   double input) const {
 		if (order_ < max_order) {
-			std::stringstream ss;
-			ss << "Sigmoid basis evaluation order too large.\n";
-			ss << "Given order " << max_order << ", ";
-			ss << "can only evaluate up to order " << order_;
-			ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
-				ss.str().c_str());
+			ProcAgnosticError<std::invalid_argument>("Sigmoid second derivative evaluation order too large");
 		}
 
 		output[0] = 1.;
@@ -295,14 +280,14 @@ class Sigmoid1d {
 			ss << "centers: " << centers_.extent(0) << ", \n";
 			ss << "widths: " << widths_.extent(0) << ",\n";
 			ss << "weights: " << weights_.extent(0) << "\n";
-			ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
+			ProcAgnosticError< std::invalid_argument>(
 				ss.str().c_str());
 		}
 		if (centers_.extent(0) < 2) {
 			std::stringstream ss;
 			ss << "Sigmoid: centers/widths/weights too short.\n";
 			ss << "Length should be of form 2+(1+2+3+...+n) for some order n>=0";
-			ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
+			ProcAgnosticError< std::invalid_argument>(
 				ss.str().c_str());
 		}
 		int n_sigmoid_centers = centers_.extent(0) - 2;
@@ -315,7 +300,7 @@ class Sigmoid1d {
 			std::stringstream ss;
 			ss << "Incorrect length of centers/widths/weights.";
 			ss << "Length should be of form 2+(1+2+3+...+n) for some order n";
-			ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
+			ProcAgnosticError< std::invalid_argument>(
 				ss.str().c_str());
 		}
 		// one added for affine part of this, two added for edge terms
diff --git a/MParT/SummarizedMap.h b/MParT/SummarizedMap.h
index 0f40964f..06ced30f 100644
--- a/MParT/SummarizedMap.h
+++ b/MParT/SummarizedMap.h
@@ -44,11 +44,10 @@ where the function \f$s:\mathbb{R}^{N-1}\rightarrow \mathbb{R}^{r}\f$ is a funct
     virtual ~SummarizedMap() = default;
 
     using ConditionalMapBase<MemorySpace>::SetCoeffs;
-    void SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs) override;
-    void WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs) override;
+    void SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs) override;
+    void WrapCoeffs(Kokkos::View<double*, MemorySpace> coeffs) override;
     #if defined(MPART_ENABLE_GPU)
-    void SetCoeffs(Kokkos::View<const double*, DeviceSpace> coeffs) override;
-    void WrapCoeffs(Kokkos::View<double*, DeviceSpace> coeffs) override;
+    void SetCoeffs(Kokkos::View<const double*, std::conditional_t<std::is_same_v<Kokkos::HostSpace, MemorySpace>, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs) override;
     #endif
 
     void SummarizePts(StridedMatrix<const double, MemorySpace> const&  pts,
@@ -95,4 +94,4 @@ where the function \f$s:\mathbb{R}^{N-1}\rightarrow \mathbb{R}^{r}\f$ is a funct
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/MParT/UnivariateExpansion.h b/MParT/UnivariateExpansion.h
index 5c7c6cd5..c733afcd 100644
--- a/MParT/UnivariateExpansion.h
+++ b/MParT/UnivariateExpansion.h
@@ -279,11 +279,11 @@ class UnivariateExpansion: public ConditionalMapBase<MemorySpace>{
     template<typename CoeffType>
     class SingleUnivariateEvaluator {
         public:
-        SingleUnivariateEvaluator(double* cache, CoeffType coeff, BasisType basis):
+        KOKKOS_FUNCTION SingleUnivariateEvaluator(double* cache, CoeffType coeff, BasisType basis):
             cache_(cache), coeff_(coeff), basisEval_(basis), maxOrderEval_(coeff.extent(0)-1)
         {};
 
-        double operator()(double x) const {
+        KOKKOS_INLINE_FUNCTION double operator()(double x) const {
             basisEval_.EvaluateAll(cache_, maxOrderEval_, x);
             double result = 0.0;
             for(int i = 0; i <= maxOrderEval_; i++) {
diff --git a/MParT/Utilities/ArrayConversions.h b/MParT/Utilities/ArrayConversions.h
index 6205964e..cc9cde94 100644
--- a/MParT/Utilities/ArrayConversions.h
+++ b/MParT/Utilities/ArrayConversions.h
@@ -220,26 +220,6 @@ namespace mpart{
     }
 
 
-    /**
-    @brief Copies a Kokkos array from device memory to host memory
-    @details
-    @param[in] inview A kokkos array in device memory.
-    @return A kokkos array in host memory.  Note that the layout (row-major or col-major) might be different than the default on the Host.  The layout will match the device's default layout.
-    */
-    template<typename DeviceMemoryType, typename ScalarType>
-    typename Kokkos::View<ScalarType,Kokkos::HostSpace>::HostMirror ToHost(Kokkos::View<ScalarType,DeviceMemoryType> const& inview){
-        typename Kokkos::View<ScalarType,DeviceMemoryType>::HostMirror outview = Kokkos::create_mirror_view(inview);
-        Kokkos::deep_copy (outview, inview);
-        return outview;
-    }
-
-    template<typename DeviceMemoryType, typename ScalarType>
-    StridedMatrix<ScalarType, Kokkos::HostSpace> ToHost(StridedMatrix<ScalarType,DeviceMemoryType> const& inview){
-        typename StridedMatrix<ScalarType,DeviceMemoryType>::HostMirror outview = Kokkos::create_mirror_view(inview);
-        Kokkos::deep_copy (outview, inview);
-        return outview;
-    }
-
     /**
     @brief Copies a range of elements from a Kokkos array in device to host memory
     @details
@@ -263,24 +243,14 @@ namespace mpart{
     Kokkos::View<double**,Kokkos::CudaSpace> deviceView("Some stuff on the device", N1, N2);
     Kokkos::View<double*,Kokkos::HostSpace> hostView = ToHost(deviceView, 2, Kokkos::All() ); // Similar to python notation: deviceView[2,:]
     @endcode
-
-    @param[in] inview A kokkos array in device memory.
-    @param[in] sliceParams One or more parameters defining a Kokkos::subview.  See the [Kokkos Subview documentation](https://github.com/kokkos/kokkos/wiki/Subviews#112-how-to-take-a-subview) for more details.
-    @return A kokkos array in host memory.  Note that the layout (row-major or col-major) might be different than the default on the Host.  The layout will match the device's default layout.
-
-    @tparam DeviceMemoryType The memory space (e.g., Kokkos::CudaSpace) or the device
-    @tparam ScalarType The type and dimension of the Kokkos::View (e.g., double*, double**, or int*)
-    @tparam SliceTypes A variadic parameter pack containing options for constructing a Kokkos::subview of the device view.
     */
-    template<typename DeviceMemoryType, typename ScalarType, class... SliceTypes>
-    Kokkos::View<ScalarType, Kokkos::HostSpace> ToHost(Kokkos::View<ScalarType,DeviceMemoryType> const& inview, SliceTypes... sliceParams){
-        auto subview = Kokkos::subview(inview, sliceParams...); // Construct the subview
-        typename Kokkos::View<ScalarType>::HostMirror outview = Kokkos::create_mirror_view(subview);
-        Kokkos::deep_copy (outview, subview);
+    template<class ViewType>
+    typename ViewType::HostMirror ToHost(ViewType const& inview){
+        typename ViewType::HostMirror outview = Kokkos::create_mirror_view(inview);
+        Kokkos::deep_copy (outview, inview);
         return outview;
     }
 
-
 #if defined(MPART_ENABLE_GPU)
 
     /**
diff --git a/MParT/Utilities/LinearAlgebra.h b/MParT/Utilities/LinearAlgebra.h
index ae758c40..aec2d5c1 100644
--- a/MParT/Utilities/LinearAlgebra.h
+++ b/MParT/Utilities/LinearAlgebra.h
@@ -26,7 +26,8 @@ template<typename ViewType1, typename ViewType2>
 void AddInPlace(ViewType1 x, ViewType2 y) {
     constexpr size_t rank = GetViewRank<ViewType1>::Rank;
     if constexpr(rank == 1) {
-        Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int i){x(i) += y(i);});
+        Kokkos::RangePolicy<typename ViewType1::execution_space> policy(0,x.extent(0));
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){x(i) += y(i);});
     } else if (rank == 2) {
         Kokkos::MDRangePolicy<Kokkos::Rank<2>,typename ViewType1::execution_space> policy({0, 0}, {x.extent(0), x.extent(1)});
         Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j){x(i, j) += y(i, j);});
@@ -286,4 +287,4 @@ class Cholesky
 
 } // namespace mpart
 
-#endif // #ifndef MPART_CUDALINEARALGEBRA_H
\ No newline at end of file
+#endif // #ifndef MPART_CUDALINEARALGEBRA_H
diff --git a/MParT/Utilities/Miscellaneous.h b/MParT/Utilities/Miscellaneous.h
index b4e3b290..fb7d7e13 100644
--- a/MParT/Utilities/Miscellaneous.h
+++ b/MParT/Utilities/Miscellaneous.h
@@ -19,22 +19,17 @@ namespace mpart{
                           std::string                                 const& key,
                           std::string                                 const& defaultValue);
 
-    /** Provides a mechanism for raising exceptions in CPU code where recovery is possible 
-        and assertions in GPU code where exceptions aren't alllowed.
-     */
-    template<typename MemorySpace, typename ErrorType>
-    struct ProcAgnosticError {
-        static void error(const char*) {
-            assert(false);
-        }
-    };
 
-    template<typename ErrorType>
-    struct ProcAgnosticError<Kokkos::HostSpace, ErrorType> {
-        static void error(const char* message) {
-            throw ErrorType(message);
+        template<typename ErrorType>
+        KOKKOS_INLINE_FUNCTION void ProcAgnosticError(const char* msg) {
+        KOKKOS_IF_ON_DEVICE(
+            Kokkos::abort(msg);
+        );
+        KOKKOS_IF_ON_HOST(
+            throw ErrorType(msg);
+        );
         }
-    };
+
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/MParT/Utilities/RootFinding.h b/MParT/Utilities/RootFinding.h
index 9ecdc82f..7149c480 100644
--- a/MParT/Utilities/RootFinding.h
+++ b/MParT/Utilities/RootFinding.h
@@ -125,7 +125,6 @@ KOKKOS_INLINE_FUNCTION double Find_x_ITP(double xlb, double xub, double yd, doub
 template<typename MemorySpace, typename FunctorType>
 KOKKOS_INLINE_FUNCTION double InverseSingleBracket(double yd, FunctorType f, double x0, const double xtol, const double ftol, int& info)
 {   
-    double stepSize=1.0;
     const unsigned int maxIts = 10000;
 
     // First, we need to find two points that bound the solution.
diff --git a/MParT/Utilities/Serialization.h b/MParT/Utilities/Serialization.h
index 5b889f2d..8c945e10 100644
--- a/MParT/Utilities/Serialization.h
+++ b/MParT/Utilities/Serialization.h
@@ -17,16 +17,17 @@
 
 namespace cereal {
     template <typename ScalarType, typename Archive, typename... Traits>
-    std::enable_if_t<traits::is_output_serializable<BinaryData<ScalarType>, Archive>::value && (std::is_same_v<Traits, Kokkos::HostSpace> || ...), void> save(
+    std::enable_if_t<traits::is_output_serializable<BinaryData<ScalarType>, Archive>::value, void> save(
         Archive &ar, Kokkos::View<ScalarType*, Traits...> const &vec) {
-        Kokkos::View<ScalarType*,Traits...> vec_h = vec;
+        Kokkos::View<ScalarType*,Traits...> vec_d = vec;
         static_assert(!(std::is_same_v<Traits, Kokkos::LayoutStride> || ...), "LayoutStride not supported");
 
         std::string name = vec.label();
         ar(name);
 
-        unsigned int sz = vec_h.extent(0);
+        unsigned int sz = vec_d.extent(0);
         ar(sz);
+        auto vec_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vec_d);
 
         if(sz>0){
             ar(binary_data(vec_h.data(), sz * sizeof(std::remove_cv_t<ScalarType>)));
@@ -34,7 +35,7 @@ namespace cereal {
     }
 
     template<typename ScalarType, typename Archive, typename... Traits>
-    std::enable_if_t<traits::is_input_serializable<BinaryData<ScalarType>, Archive>::value && (std::is_same_v<Traits, Kokkos::HostSpace> || ...), void> load(
+    std::enable_if_t<traits::is_input_serializable<BinaryData<ScalarType>, Archive>::value, void> load(
         Archive &ar, Kokkos::View<ScalarType*, Traits...> &vec) {
         static_assert(!(std::is_same_v<Traits, Kokkos::LayoutStride> || ...), "LayoutStride not supported");
         
@@ -42,12 +43,16 @@ namespace cereal {
         ar(name);
         unsigned int sz;
         ar(sz);
-        Kokkos::View<ScalarType*,Traits...> vec_h (name, sz);
+        
+        Kokkos::View<ScalarType*,Traits...> vec_d (name, sz);
+        auto vec_h = Kokkos::create_mirror_view(Kokkos::HostSpace(), vec_d);
 
         if(sz>0){
             ar(binary_data(vec_h.data(), sz * sizeof(ScalarType)));
         }
-        vec = std::move(vec_h);
+        Kokkos::deep_copy(vec_d, vec_h);
+        vec = std::move(vec_d);
+
     }
 
     template<typename ScalarType, typename Archive, typename... Traits>
diff --git a/bindings/julia/src/ParameterizedFunctionBase.cpp b/bindings/julia/src/ParameterizedFunctionBase.cpp
index bae78cf7..1f38988e 100644
--- a/bindings/julia/src/ParameterizedFunctionBase.cpp
+++ b/bindings/julia/src/ParameterizedFunctionBase.cpp
@@ -8,7 +8,10 @@ void mpart::binding::ParameterizedFunctionBaseWrapper(jlcxx::Module &mod) {
     // ParameterizedFunctionBase
     mod.add_type<ParameterizedFunctionBase<Kokkos::HostSpace>>("ParameterizedFunctionBase")
         .method("CoeffMap" , [](ParameterizedFunctionBase<Kokkos::HostSpace> &pfb){ return KokkosToJulia(pfb.Coeffs()); })
-        .method("SetCoeffs", [](ParameterizedFunctionBase<Kokkos::HostSpace> &pfb, jlcxx::ArrayRef<double> v){ pfb.SetCoeffs(JuliaToKokkos(v)); })
+        .method("SetCoeffs", [](ParameterizedFunctionBase<Kokkos::HostSpace> &pfb, jlcxx::ArrayRef<double> v){ 
+	    Kokkos::View<const double*, Kokkos::HostSpace> ConstCoeffs = JuliaToKokkos(v);
+	    pfb.SetCoeffs(ConstCoeffs); 
+	})
         .method("numCoeffs", [](ParameterizedFunctionBase<Kokkos::HostSpace> &pfb) { return pfb.numCoeffs; })
         .method("inputDim" , [](ParameterizedFunctionBase<Kokkos::HostSpace> &pfb) { return pfb.inputDim; })
         .method("outputDim", [](ParameterizedFunctionBase<Kokkos::HostSpace> &pfb) { return pfb.outputDim; })
@@ -76,4 +79,4 @@ void mpart::binding::ParameterizedFunctionBaseWrapper(jlcxx::Module &mod) {
             std::cerr << "DeserializeMap: MParT was not compiled with Cereal support. Operation incomplete." << std::endl;
 #endif // MPART_HAS_CEREAL
     });
-}
\ No newline at end of file
+}
diff --git a/bindings/python/src/CommonPybindUtilities.cpp b/bindings/python/src/CommonPybindUtilities.cpp
index 0da023d1..4b221948 100644
--- a/bindings/python/src/CommonPybindUtilities.cpp
+++ b/bindings/python/src/CommonPybindUtilities.cpp
@@ -29,5 +29,5 @@ void mpart::binding::Initialize(py::dict opts) {
 void mpart::binding::CommonUtilitiesWrapper(py::module &m)
 {   
     m.def("Initialize", py::overload_cast<py::dict>( &mpart::binding::Initialize ));
-    m.def("Concurrency", &Kokkos::DefaultExecutionSpace::concurrency);
+    m.def("Concurrency", [](){return Kokkos::DefaultExecutionSpace::concurrency();});
 }
diff --git a/bindings/python/src/ConditionalMapBase.cpp b/bindings/python/src/ConditionalMapBase.cpp
index bf74aa1b..cacbf5b4 100644
--- a/bindings/python/src/ConditionalMapBase.cpp
+++ b/bindings/python/src/ConditionalMapBase.cpp
@@ -44,7 +44,7 @@ void mpart::binding::ConditionalMapBaseWrapper(py::module &m)
         .def("GetBaseFunction", &ConditionalMapBase<MemorySpace>::GetBaseFunction)
 #if defined(MPART_HAS_CEREAL)
         .def(py::pickle(
-            [](std::shared_ptr<ConditionalMapBase<Kokkos::HostSpace>> const& ptr) { // __getstate__
+            [](std::shared_ptr<ConditionalMapBase<MemorySpace>> const& ptr) { // __getstate__
                 std::stringstream ss;
                 ptr->Save(ss);
                 return py::bytes(ss.str());
@@ -54,12 +54,12 @@ void mpart::binding::ConditionalMapBaseWrapper(py::module &m)
                 std::stringstream ss;
                 ss.str(input);
 
-                auto ptr = std::dynamic_pointer_cast<ConditionalMapBase<Kokkos::HostSpace>>(ParameterizedFunctionBase<Kokkos::HostSpace>::Load(ss));
+                auto ptr = std::dynamic_pointer_cast<ConditionalMapBase<MemorySpace>>(ParameterizedFunctionBase<MemorySpace>::Load(ss));
                 return ptr;
             }
         ))
 #endif
-        ;
+    ;
 
 }
 
diff --git a/bindings/python/src/MapObjective.cpp b/bindings/python/src/MapObjective.cpp
index 7d5e2825..5a1f59f1 100644
--- a/bindings/python/src/MapObjective.cpp
+++ b/bindings/python/src/MapObjective.cpp
@@ -30,15 +30,15 @@ void mpart::binding::MapObjectiveWrapper(py::module &m) {
 
     py::class_<KLObjective<MemorySpace>, MapObjective<MemorySpace>, std::shared_ptr<KLObjective<MemorySpace>>>(m, t2Name.c_str());
     m.def(mName.c_str(), [](Eigen::Ref<Eigen::MatrixXd> &train, unsigned int dim){
-            StridedMatrix<const double, MemorySpace> trainView = MatToKokkos<double, MemorySpace>(train);
+            StridedMatrix<const double, MemorySpace> trainView = ConstColMatToKokkos<double, MemorySpace>(train);
             Kokkos::View<double**,MemorySpace> storeTrain ("Training data store", trainView.extent(0), trainView.extent(1));
             Kokkos::deep_copy(storeTrain, trainView);
             trainView = storeTrain;
             return ObjectiveFactory::CreateGaussianKLObjective(trainView, dim);
         }, "train"_a, "dim"_a = 0)
         .def(mName.c_str(), [](Eigen::Ref<Eigen::MatrixXd> &trainEig, Eigen::Ref<Eigen::MatrixXd> &testEig, unsigned int dim){
-            StridedMatrix<const double, MemorySpace> trainView = MatToKokkos<double, MemorySpace>(trainEig);
-            StridedMatrix<const double, MemorySpace> testView = MatToKokkos<double, MemorySpace>(testEig);
+            StridedMatrix<const double, MemorySpace> trainView = ConstColMatToKokkos<double, MemorySpace>(trainEig);
+            StridedMatrix<const double, MemorySpace> testView = ConstColMatToKokkos<double, MemorySpace>(testEig);
             Kokkos::View<double**,MemorySpace> storeTrain ("Training data store", trainView.extent(0), trainView.extent(1));
             Kokkos::View<double**,MemorySpace> storeTest ("Testing data store", testView.extent(0), testView.extent(1));
             Kokkos::deep_copy(storeTrain, trainView);
diff --git a/bindings/python/src/ParameterizedFunctionBase.cpp b/bindings/python/src/ParameterizedFunctionBase.cpp
index 3fe63c70..ba40b64d 100644
--- a/bindings/python/src/ParameterizedFunctionBase.cpp
+++ b/bindings/python/src/ParameterizedFunctionBase.cpp
@@ -93,7 +93,7 @@ void mpart::binding::ParameterizedFunctionBaseWrapper<mpart::DeviceSpace>(py::mo
     // ParameterizedFunctionBase
     py::class_<ParameterizedFunctionBase<mpart::DeviceSpace>, std::shared_ptr<ParameterizedFunctionBase<mpart::DeviceSpace>>>(m, "dParameterizedFunctionBase")
         .def("CoeffMap", [](const ParameterizedFunctionBase<mpart::DeviceSpace> &f) {
-            Kokkos::View<const double*, Kokkos::HostSpace> host_coeffs = ToHost<mpart::DeviceSpace, const double*>( f.Coeffs() );
+            Kokkos::View<const double*, Kokkos::HostSpace> host_coeffs = ToHost( f.Coeffs() );
             return Eigen::VectorXd(Eigen::Map<const Eigen::VectorXd>(host_coeffs.data(), host_coeffs.size()));
         })
         .def("SetCoeffs", py::overload_cast<Eigen::Ref<Eigen::VectorXd>>(&ParameterizedFunctionBase<mpart::DeviceSpace>::SetCoeffs))
diff --git a/bindings/python/src/Serialization.cpp b/bindings/python/src/Serialization.cpp
index ee1c251a..12924bf6 100644
--- a/bindings/python/src/Serialization.cpp
+++ b/bindings/python/src/Serialization.cpp
@@ -16,8 +16,8 @@ namespace py = pybind11;
 using namespace mpart;
 using namespace mpart::binding;
 
-template<>
-void mpart::binding::DeserializeWrapper<Kokkos::HostSpace>(py::module &m)
+template<typename MemorySpace>
+void mpart::binding::DeserializeWrapper(py::module &m)
 {
     m.def("DeserializeMap",
     [](std::string const &filename) {
@@ -25,8 +25,13 @@ void mpart::binding::DeserializeWrapper<Kokkos::HostSpace>(py::module &m)
         cereal::BinaryInputArchive archive(is);
         unsigned int inputDim, outputDim, numCoeffs;
         archive(inputDim, outputDim, numCoeffs);
-        Kokkos::View<double*, Kokkos::HostSpace> coeffs ("Map coeffs", numCoeffs);
+        Kokkos::View<double*, MemorySpace> coeffs ("Map coeffs", numCoeffs);
         load(archive, coeffs);
         return std::make_tuple(inputDim, outputDim, CopyKokkosToVec(coeffs));
     });
-}
\ No newline at end of file
+}
+
+template void mpart::binding::DeserializeWrapper<Kokkos::HostSpace>(py::module&);
+#if defined(MPART_ENABLE_GPU)
+template void mpart::binding::DeserializeWrapper<mpart::DeviceSpace>(py::module&);
+#endif
diff --git a/bindings/python/src/TrainMapAdaptive.cpp b/bindings/python/src/TrainMapAdaptive.cpp
index cbf209ca..3a23a291 100644
--- a/bindings/python/src/TrainMapAdaptive.cpp
+++ b/bindings/python/src/TrainMapAdaptive.cpp
@@ -58,5 +58,6 @@ void mpart::binding::TrainMapAdaptiveWrapper(py::module &m) {
 
 template void mpart::binding::TrainMapAdaptiveWrapper<Kokkos::HostSpace>(py::module&);
 #if defined(MPART_ENABLE_GPU)
-template void mpart::binding::TrainMapAdaptiveWrapper<mpart::DeviceSpace>(py::module&);
+// TODO: Implement TrainMap for GPU
+// template void mpart::binding::TrainMapAdaptiveWrapper<mpart::DeviceSpace>(py::module&);
 #endif // MPART_ENABLE_GPU
\ No newline at end of file
diff --git a/bindings/python/src/TriangularMap.cpp b/bindings/python/src/TriangularMap.cpp
index 578a85fc..1091f476 100644
--- a/bindings/python/src/TriangularMap.cpp
+++ b/bindings/python/src/TriangularMap.cpp
@@ -20,9 +20,9 @@ void mpart::binding::TriangularMapWrapper(py::module &m)
     py::class_<TriangularMap<MemorySpace>, ConditionalMapBase<MemorySpace>, ParameterizedFunctionBase<MemorySpace>, std::shared_ptr<TriangularMap<MemorySpace>>>(m, tName.c_str())
         .def(py::init<std::vector<std::shared_ptr<ConditionalMapBase<MemorySpace>>>, bool>(), py::arg("comps"), py::arg("moveCoeffs") = false)
         .def("GetComponent", &TriangularMap<MemorySpace>::GetComponent)
-        #if defined(MPART_HAS_CEREAL)
+#if defined(MPART_HAS_CEREAL)
         .def(py::pickle(
-            [](std::shared_ptr<TriangularMap<Kokkos::HostSpace>> const& ptr) { // __getstate__
+            [](std::shared_ptr<TriangularMap<MemorySpace>> const& ptr) { // __getstate__
                 std::stringstream ss;
                 ptr->Save(ss);
                 return py::bytes(ss.str());
@@ -32,12 +32,12 @@ void mpart::binding::TriangularMapWrapper(py::module &m)
                 std::stringstream ss;
                 ss.str(input);
 
-                auto ptr = std::dynamic_pointer_cast<TriangularMap<Kokkos::HostSpace>>(ParameterizedFunctionBase<Kokkos::HostSpace>::Load(ss));
+                auto ptr = std::dynamic_pointer_cast<TriangularMap<MemorySpace>>(ParameterizedFunctionBase<MemorySpace>::Load(ss));
                 return ptr;
             }
         ))
-        #endif
-        ;
+#endif
+    ;
 
 }
 
diff --git a/bindings/python/src/Wrapper.cpp b/bindings/python/src/Wrapper.cpp
index eee93e63..cbb58659 100644
--- a/bindings/python/src/Wrapper.cpp
+++ b/bindings/python/src/Wrapper.cpp
@@ -40,8 +40,9 @@ PYBIND11_MODULE(pympart, m) {
     MapFactoryWrapper<mpart::DeviceSpace>(m);
     AffineMapWrapperDevice(m);
     AffineFunctionWrapperDevice(m);
-    IdentityMapWrapper<Kokkos::DeviceSpace>(m);
-    SerializeWrapper<mpart::DeviceSpace>(m);
+    IdentityMapWrapper<mpart::DeviceSpace>(m);
+#if defined(MPART_HAS_CEREAL)
     DeserializeWrapper<mpart::DeviceSpace>(m);
 #endif
+#endif
 }
\ No newline at end of file
diff --git a/src/AffineMap.cpp b/src/AffineMap.cpp
index 1e4b7baa..a23c7201 100644
--- a/src/AffineMap.cpp
+++ b/src/AffineMap.cpp
@@ -56,7 +56,8 @@ template<typename MemorySpace>
 void AffineMap<MemorySpace>::LogDeterminantImpl(StridedMatrix<const double, MemorySpace> const& pts,
                                                 StridedVector<double, MemorySpace>              output)
 {
-    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy(0,output.size());
+    unsigned int N = output.size();
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy{0u, N};
 
     Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& i) {
         output(i) = logDet_;
diff --git a/src/ComposedMap.cpp b/src/ComposedMap.cpp
index 503b100c..a681ffc4 100644
--- a/src/ComposedMap.cpp
+++ b/src/ComposedMap.cpp
@@ -178,7 +178,7 @@ ComposedMap<MemorySpace>::ComposedMap(std::vector<std::shared_ptr<ConditionalMap
 }
 
 template<typename MemorySpace>
-void ComposedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs)
+void ComposedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs)
 {
     // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
     ConditionalMapBase<MemorySpace>::SetCoeffs(coeffs);
@@ -195,7 +195,7 @@ void ComposedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::Hos
 }
 
 template<typename MemorySpace>
-void ComposedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs)
+void ComposedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, MemorySpace> coeffs)
 {
     // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
     ConditionalMapBase<MemorySpace>::WrapCoeffs(coeffs);
@@ -212,19 +212,36 @@ void ComposedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpac
 }
 
 #if defined(MPART_ENABLE_GPU)
-template<typename MemorySpace>
-void ComposedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::DefaultExecutionSpace::memory_space> coeffs)
+template<>
+void ComposedMap<Kokkos::HostSpace>::SetCoeffs(Kokkos::View<const double*, mpart::DeviceSpace> coeffs)
 {
     // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
-    ConditionalMapBase<MemorySpace>::SetCoeffs(coeffs);
+     ConditionalMapBase<Kokkos::HostSpace>::SetCoeffs(coeffs);
 
     // Now create subviews for each of the maps
     unsigned int cumNumCoeffs = 0;
     for(unsigned int i=0; i<maps_.size(); ++i){
         assert(cumNumCoeffs+maps_.at(i)->numCoeffs <= this->savedCoeffs.size());
 
-        maps_.at(i)->savedCoeffs = Kokkos::subview(this->savedCoeffs,
-            std::make_pair(cumNumCoeffs, cumNumCoeffs+maps_.at(i)->numCoeffs));
+        maps_.at(i)->WrapCoeffs(Kokkos::subview(this->savedCoeffs,
+            std::make_pair(cumNumCoeffs, cumNumCoeffs+maps_.at(i)->numCoeffs)));
+        cumNumCoeffs += maps_.at(i)->numCoeffs;
+    }
+}
+
+template<>
+void ComposedMap<mpart::DeviceSpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs)
+{
+    // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
+     ConditionalMapBase<mpart::DeviceSpace>::SetCoeffs(coeffs);
+
+    // Now create subviews for each of the maps
+    unsigned int cumNumCoeffs = 0;
+    for(unsigned int i=0; i<maps_.size(); ++i){
+        assert(cumNumCoeffs+maps_.at(i)->numCoeffs <= this->savedCoeffs.size());
+
+        maps_.at(i)->WrapCoeffs(Kokkos::subview(this->savedCoeffs,
+            std::make_pair(cumNumCoeffs, cumNumCoeffs+maps_.at(i)->numCoeffs)));
         cumNumCoeffs += maps_.at(i)->numCoeffs;
     }
 }
@@ -471,4 +488,4 @@ void ComposedMap<MemorySpace>::LogDeterminantInputGradImpl(StridedMatrix<const d
 template class mpart::ComposedMap<Kokkos::HostSpace>;
 #if defined(MPART_ENABLE_GPU)
     template class mpart::ComposedMap<DeviceSpace>;
-#endif
\ No newline at end of file
+#endif
diff --git a/src/ConditionalMapBase.cpp b/src/ConditionalMapBase.cpp
index ff09ddc0..7b2a092a 100644
--- a/src/ConditionalMapBase.cpp
+++ b/src/ConditionalMapBase.cpp
@@ -250,12 +250,13 @@ StridedMatrix<double, mpart::DeviceSpace> ConditionalMapBase<mpart::DeviceSpace>
     return output;
 }
 
+
 template<>
 template<>
 StridedMatrix<double, Kokkos::HostSpace> ConditionalMapBase<mpart::DeviceSpace>::LogDeterminantInputGrad(StridedMatrix<const double, Kokkos::HostSpace> const& pts)
 {
     // Copy the points to the device space
-    StridedMatrix<const double, mpart::DeviceSpace> pts_device = ToDevice<mpart::DeviceSpace,const double>(pts);
+    StridedMatrix<const double, mpart::DeviceSpace> pts_device = ToDevice<mpart::DeviceSpace>(pts);
 
     // Evaluate on the device space
     StridedMatrix<double, mpart::DeviceSpace> evals_device = this->LogDeterminantInputGrad(pts_device);
@@ -275,7 +276,7 @@ StridedMatrix<double, mpart::DeviceSpace> ConditionalMapBase<Kokkos::HostSpace>:
     StridedMatrix<double, Kokkos::HostSpace> evals_host = this->LogDeterminantInputGrad(pts_host);
 
     // Copy back to the device
-    return ToDevice<mpart::DeviceSpace,double>(evals_host);
+    return ToDevice<mpart::DeviceSpace>(evals_host);
 }
 
 
@@ -296,4 +297,4 @@ Eigen::RowMatrixXd ConditionalMapBase<mpart::DeviceSpace>::LogDeterminantInputGr
 template class mpart::ConditionalMapBase<Kokkos::HostSpace>;
 #if defined(MPART_ENABLE_GPU)
     template class mpart::ConditionalMapBase<DeviceSpace>;
-#endif
\ No newline at end of file
+#endif
diff --git a/src/Distributions/DensityBase.cpp b/src/Distributions/DensityBase.cpp
index c313732c..e12daf05 100644
--- a/src/Distributions/DensityBase.cpp
+++ b/src/Distributions/DensityBase.cpp
@@ -1,49 +1,102 @@
 #include "MParT/Distributions/DensityBase.h"
+#include "MParT/Utilities/ArrayConversions.h"
 
 using namespace mpart;
 
-template<>
-Eigen::VectorXd DensityBase<Kokkos::HostSpace>::LogDensity(Eigen::Ref<const Eigen::RowMatrixXd> const &pts) {
+template<typename MemorySpace>
+Eigen::VectorXd DensityBase<MemorySpace>::LogDensity(Eigen::Ref<const Eigen::RowMatrixXd> const &pts) {
     // Allocate output
     Eigen::VectorXd output(pts.cols());
-    StridedMatrix<const double, Kokkos::HostSpace> ptsView = ConstRowMatToKokkos<double, Kokkos::HostSpace>(pts);
-    StridedVector<double, Kokkos::HostSpace> outView = VecToKokkos<double, Kokkos::HostSpace>(output);
+    StridedVector<double, Kokkos::HostSpace> outView_h = VecToKokkos<double, Kokkos::HostSpace>(output);
+    StridedMatrix<const double, MemorySpace> ptsView_d = ConstRowMatToKokkos<double, MemorySpace>(pts);
+    StridedVector<double, MemorySpace> outView_d;
+    if constexpr(std::is_same_v<MemorySpace, Kokkos::HostSpace>) outView_d = outView_h;
+    else outView_d = Kokkos::View<double*, MemorySpace> {"Outview device", ptsView_d.extent(1)};
     // Call the LogDensity function
-    LogDensityImpl(ptsView, outView);
-
+    LogDensityImpl(ptsView_d, outView_d);
+    Kokkos::deep_copy(outView_h, outView_d);
     return output;
 }
 
-template<>
-Eigen::RowMatrixXd DensityBase<Kokkos::HostSpace>::LogDensityInputGrad(Eigen::Ref<const Eigen::RowMatrixXd> const &pts) {
+template<typename MemorySpace>
+Eigen::RowMatrixXd DensityBase<MemorySpace>::LogDensityInputGrad(Eigen::Ref<const Eigen::RowMatrixXd> const &pts) {
     // Allocate output
     Eigen::RowMatrixXd output(pts.rows(), pts.cols());
-    StridedMatrix<const double, Kokkos::HostSpace> ptsView = ConstRowMatToKokkos<double, Kokkos::HostSpace>(pts);
-    StridedMatrix<double, Kokkos::HostSpace> outView = MatToKokkos<double, Kokkos::HostSpace>(output);
+    StridedMatrix<double, Kokkos::HostSpace> outView_h = MatToKokkos<double, Kokkos::HostSpace>(output);
+    StridedMatrix<const double, MemorySpace> ptsView_d = ConstRowMatToKokkos<double, MemorySpace>(pts);
+    StridedMatrix<double, MemorySpace> outView_d;
+    if constexpr(std::is_same_v<MemorySpace, Kokkos::HostSpace>) outView_d = outView_h;
+    else outView_d = MatToKokkos<double, MemorySpace>(output); // TODO: Could be optimized
     // Call the LogDensity function
-    LogDensityInputGradImpl(ptsView, outView);
-
+    LogDensityInputGradImpl(ptsView_d, outView_d);
+    Kokkos::deep_copy(outView_h, outView_d);
     return output;
 }
 
-template<>
-template<>
-StridedVector<double, Kokkos::HostSpace> DensityBase<Kokkos::HostSpace>::LogDensity<Kokkos::HostSpace>(StridedMatrix<const double, Kokkos::HostSpace> const &X) {
+template<typename MemorySpace>
+StridedVector<double, MemorySpace> DensityBase<MemorySpace>::LogDensity(StridedMatrix<const double, MemorySpace> const &X) {
     // Allocate output
-    Kokkos::View<double*, Kokkos::HostSpace> output("output", X.extent(1));
+    Kokkos::View<double*, MemorySpace> output("output", X.extent(1));
     // Call the LogDensity function
     LogDensityImpl(X, output);
+    return output;
+}
+
+template<typename MemorySpace>
+StridedMatrix<double, MemorySpace> DensityBase<MemorySpace>::LogDensityInputGrad(StridedMatrix<const double, MemorySpace> const &X) {
+    // Allocate output
+    Kokkos::View<double**, MemorySpace> output("output", X.extent(0), X.extent(1));
+    // Call the LogDensity function
+    LogDensityInputGradImpl(X, output);
 
     return output;
 }
 
+#if defined(MPART_ENABLE_GPU)
+
 template<>
+StridedVector<double, Kokkos::HostSpace> DensityBase<DeviceSpace>::LogDensity(StridedMatrix<const double, Kokkos::HostSpace> const &X) {
+    StridedMatrix<const double, DeviceSpace> X_d = ToDevice<DeviceSpace>(X);
+    // Allocate output
+    Kokkos::View<double*, DeviceSpace> output_d("output", X.extent(1));
+    // Call the LogDensity function
+    LogDensityImpl(X_d, output_d);
+    return ToHost(output_d);
+}
+
 template<>
-StridedMatrix<double, Kokkos::HostSpace> DensityBase<Kokkos::HostSpace>::LogDensityInputGrad<Kokkos::HostSpace>(StridedMatrix<const double, Kokkos::HostSpace> const &X) {
+StridedVector<double, DeviceSpace> DensityBase<Kokkos::HostSpace>::LogDensity(StridedMatrix<const double, DeviceSpace> const &X) {
+    StridedMatrix<const double, Kokkos::HostSpace> X_h = ToHost(X);
     // Allocate output
-    Kokkos::View<double**, Kokkos::HostSpace> output("output", X.extent(0), X.extent(1));
+    Kokkos::View<double*, Kokkos::HostSpace> output_h("output", X.extent(1));
     // Call the LogDensity function
-    LogDensityInputGradImpl(X, output);
+    LogDensityImpl(X_h, output_h);
+    return ToDevice<DeviceSpace>(output_h);
+}
 
-    return output;
-}
\ No newline at end of file
+template<>
+StridedMatrix<double, Kokkos::HostSpace> DensityBase<DeviceSpace>::LogDensityInputGrad(StridedMatrix<const double, Kokkos::HostSpace> const &X) {
+    StridedMatrix<const double, DeviceSpace> X_d = ToDevice<DeviceSpace>(X);
+    // Allocate output
+    Kokkos::View<double**, DeviceSpace> output_d("output", X.extent(0), X.extent(1));
+    // Call the LogDensityInputGrad function
+    LogDensityInputGradImpl(X_d, output_d);
+    return ToHost(output_d);
+}
+
+template<>
+StridedMatrix<double, DeviceSpace> DensityBase<Kokkos::HostSpace>::LogDensityInputGrad(StridedMatrix<const double, DeviceSpace> const &X) {
+    StridedMatrix<const double, Kokkos::HostSpace> X_h = ToHost(X);
+    // Allocate output
+    Kokkos::View<double**, Kokkos::HostSpace> output_h("output", X.extent(0), X.extent(1));
+    // Call the LogDensityInputGrad function
+    LogDensityInputGradImpl(X_h, output_h);
+    return ToDevice<DeviceSpace>(output_h);
+}
+
+#endif
+
+template class mpart::DensityBase<Kokkos::HostSpace>;
+#if defined(MPART_ENABLE_GPU)
+    template class mpart::DensityBase<mpart::DeviceSpace>;
+#endif
\ No newline at end of file
diff --git a/src/Distributions/GaussianSamplerDensity.cpp b/src/Distributions/GaussianSamplerDensity.cpp
index 9d991842..22d05196 100644
--- a/src/Distributions/GaussianSamplerDensity.cpp
+++ b/src/Distributions/GaussianSamplerDensity.cpp
@@ -21,21 +21,21 @@ GaussianSamplerDensity<MemorySpace>::GaussianSamplerDensity(unsigned int dim): S
 template<typename MemorySpace>
 void GaussianSamplerDensity<MemorySpace>::LogDensityImpl(StridedMatrix<const double, MemorySpace> const &pts, StridedVector<double, MemorySpace> output) {
     // Compute the log density
-    int M = pts.extent(0);
-    int N = pts.extent(1);
+    unsigned int M = pts.extent(0);
+    unsigned int N = pts.extent(1);
     if(M != dim_) {
         throw std::runtime_error("GaussianSamplerDensity::LogDensityImpl: The number of rows in pts must match the dimension of the distribution.");
     }
-    Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space> policy({{0, 0}}, {{N, M}});
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space> policy {{0u, 0u}, {N, M}};
     Kokkos::View<double**, Kokkos::LayoutLeft, MemorySpace> diff ("diff", M, N);
 
     if(mean_.extent(0) == 0){
-        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) {
+        Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) {
             diff(i,j) = pts(i,j);
         });
     }
     else {
-        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) {
+        Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) {
             diff(i,j) = pts(i,j) - mean_(i);
         });
     }
@@ -44,7 +44,8 @@ void GaussianSamplerDensity<MemorySpace>::LogDensityImpl(StridedMatrix<const dou
         covChol_.solveInPlaceL(diff);
     }
 
-    Kokkos::parallel_for(N, KOKKOS_LAMBDA(const int& j){
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy1d{0u, N};
+    Kokkos::parallel_for(policy1d, KOKKOS_CLASS_LAMBDA(const int& j){
         output(j) = -0.5*( M*logtau_ + logDetCov_ );
         for(int d=0; d<M; ++d){
             output(j) += -0.5*diff(d,j)*diff(d,j);
@@ -63,12 +64,12 @@ void GaussianSamplerDensity<MemorySpace>::LogDensityInputGradImpl(StridedMatrix<
     Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space> policy({{0, 0}}, {{N, M}});
 
     if(mean_.extent(0) == 0){
-        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) {
+        Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) {
             output(i,j) = -pts(i,j);
         });
     }
     else {
-        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) {
+        Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) {
             output(i,j) = -pts(i,j) + mean_(i);
         });
     }
@@ -94,7 +95,7 @@ void GaussianSamplerDensity<MemorySpace>::SampleImpl(StridedMatrix<double, Memor
     if(idCov_) {
         // If dim_ is 0, the distribution is the standard normal
         if(mean_.extent(0) == 0) {
-            Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int j, const int i) {
+            Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int j, const int i) {
                 GeneratorType rgen = rand_pool.get_state();
                 output_(i,j) = rgen.normal();
                 rand_pool.free_state(rgen);
@@ -102,7 +103,7 @@ void GaussianSamplerDensity<MemorySpace>::SampleImpl(StridedMatrix<double, Memor
         }
         // Otherwise, we sample from the mean-shifted normal
         else {
-            Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int j, const int i) {
+            Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int j, const int i) {
                 GeneratorType rgen = rand_pool.get_state();
                 output_(i,j) = rgen.normal() + mean_(i);
                 rand_pool.free_state(rgen);
@@ -114,7 +115,7 @@ void GaussianSamplerDensity<MemorySpace>::SampleImpl(StridedMatrix<double, Memor
         // Enforce that the output is the correct layout!
         Kokkos::View<double**, Kokkos::LayoutLeft, MemorySpace> output = output_;
         // Sample from the standard normal
-        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int j, const int i) {
+        Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int j, const int i) {
             GeneratorType rgen = rand_pool.get_state();
             output(i,j) = rgen.normal();
             rand_pool.free_state(rgen);
@@ -126,7 +127,7 @@ void GaussianSamplerDensity<MemorySpace>::SampleImpl(StridedMatrix<double, Memor
             Kokkos::deep_copy(output, mul);
         }
         else {
-            Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int j, const int i) {
+            Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int j, const int i) {
                 output(i,j) = mul(i,j) + mean_(i);
             });
         }
@@ -136,4 +137,4 @@ void GaussianSamplerDensity<MemorySpace>::SampleImpl(StridedMatrix<double, Memor
 template struct mpart::GaussianSamplerDensity<Kokkos::HostSpace>;
 #ifdef MPART_ENABLE_GPU
 template struct mpart::GaussianSamplerDensity<mpart::DeviceSpace>;
-#endif
\ No newline at end of file
+#endif
diff --git a/src/InnerMarginalAffineMap.cpp b/src/InnerMarginalAffineMap.cpp
index d20e9993..ea225f64 100644
--- a/src/InnerMarginalAffineMap.cpp
+++ b/src/InnerMarginalAffineMap.cpp
@@ -7,6 +7,17 @@
 
 using namespace mpart;
 
+template<typename MemorySpace>
+double CalculateLogDet(StridedVector<double, MemorySpace> scale) {
+    double logDet_ = 0.;
+    unsigned int N = scale.size();
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0, N};
+    Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(const int i, double& ldet){
+        ldet += Kokkos::log(scale(i));
+    }, logDet_);
+    return logDet_;
+}
+
 template<typename MemorySpace>
 InnerMarginalAffineMap<MemorySpace>::InnerMarginalAffineMap(StridedVector<double,MemorySpace> scale,
                                   StridedVector<double,MemorySpace> shift,
@@ -20,14 +31,11 @@ InnerMarginalAffineMap<MemorySpace>::InnerMarginalAffineMap(StridedVector<double
     Kokkos::deep_copy(scale_, scale);
     Kokkos::deep_copy(shift_, shift);
     if (scale_.size() != shift_.size())
-        ProcAgnosticError<MemorySpace, std::runtime_error>::error("InnerMarginalAffineMap: scale and shift must have the same size");
+        ProcAgnosticError<std::runtime_error>("InnerMarginalAffineMap: scale and shift must have the same size");
     if (scale_.size() != map->inputDim)
-        ProcAgnosticError<MemorySpace, std::runtime_error>::error("InnerMarginalAffineMap: scale and shift must have the same size as the input dimension of the map");
+        ProcAgnosticError<std::runtime_error>("InnerMarginalAffineMap: scale and shift must have the same size as the input dimension of the map");
 
-    logDet_ = 0.;
-    Kokkos::parallel_reduce("InnerMarginalAffineMap logdet", scale.extent(0), KOKKOS_LAMBDA(const int&i, double& ldet){
-        ldet += Kokkos::log(scale_(i));
-    }, logDet_);
+    logDet_ = CalculateLogDet(scale);
     this->SetCoeffs(map->Coeffs());
     if (moveCoeffs) {
         map->WrapCoeffs(this->savedCoeffs);
@@ -38,14 +46,16 @@ template<typename MemorySpace>
 void InnerMarginalAffineMap<MemorySpace>::LogDeterminantImpl(StridedMatrix<const double, MemorySpace> const& pts,
                                                 StridedVector<double, MemorySpace>              output)
 {
-    int n1 = pts.extent(0), n2 = pts.extent(1);
+    unsigned int n1 = pts.extent(0), n2 = pts.extent(1);
     Kokkos::View<double**, MemorySpace> tmp("tmp", n1, n2);
     Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space> policy({0, 0}, {n1, n2});
     Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& i, const int& j) {
         tmp(i,j) = pts(i,j)*scale_(i) + shift_(i);
     });
     map_->LogDeterminantImpl(tmp, output);
-    Kokkos::parallel_for("InnerMarginalAffineMap LogDeterminant", output.size(), KOKKOS_CLASS_LAMBDA(const int& i) {
+    unsigned int N = output.size();
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy1d {0, N};
+    Kokkos::parallel_for("InnerMarginalAffineMap LogDeterminant", policy1d, KOKKOS_CLASS_LAMBDA(const int& i) {
         output(i) += logDet_;
     });
 }
@@ -55,7 +65,7 @@ void InnerMarginalAffineMap<MemorySpace>::InverseImpl(StridedMatrix<const double
                                          StridedMatrix<const double, MemorySpace> const& r,
                                          StridedMatrix<double, MemorySpace>              output)
 {
-    int x1_n1 = this->inputDim - this->outputDim, x1_n2 = x1.extent(1);
+    unsigned int x1_n1 = this->inputDim - this->outputDim, x1_n2 = x1.extent(1);
     Kokkos::View<const double**, MemorySpace> x1_tmp;
     if(x1_n1 > 0) {
         Kokkos::View<double**, MemorySpace> x1_t("x1_tmp", x1_n1, x1_n2);
diff --git a/src/MapFactory.cpp b/src/MapFactory.cpp
index d810933f..1382148f 100644
--- a/src/MapFactory.cpp
+++ b/src/MapFactory.cpp
@@ -53,7 +53,7 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> mpart::MapFactory::CreateSingle
     if(activeInd == 1){  // special case if activeInd = 1, map is of form [T_1; Id]
 
         // Bottom identity map
-        std::shared_ptr<ConditionalMapBase<MemorySpace>> botIdMap = std::make_shared<IdentityMap<Kokkos::HostSpace>>(dim, dim-activeInd);
+        std::shared_ptr<ConditionalMapBase<MemorySpace>> botIdMap = std::make_shared<IdentityMap<MemorySpace>>(dim, dim-activeInd);
 
         // fill a vector of components with identity, active component, identity
         std::vector<std::shared_ptr<ConditionalMapBase<MemorySpace>>> blocks(2);
@@ -67,7 +67,7 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> mpart::MapFactory::CreateSingle
     }
     else if (activeInd == dim){  // special case if activeInd = dim, map is of form [Id; T_d]
         // Top identity map
-        std::shared_ptr<ConditionalMapBase<MemorySpace>> topIdMap = std::make_shared<IdentityMap<Kokkos::HostSpace>>(activeInd-1, activeInd-1);
+        std::shared_ptr<ConditionalMapBase<MemorySpace>> topIdMap = std::make_shared<IdentityMap<MemorySpace>>(activeInd-1, activeInd-1);
 
         // fill a vector of components with identity, active component, identity
         std::vector<std::shared_ptr<ConditionalMapBase<MemorySpace>>> blocks(2);
@@ -80,10 +80,10 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> mpart::MapFactory::CreateSingle
     else{ // general case, map is of form [Id; T_i; Id]
 
         // Top identity map
-        std::shared_ptr<ConditionalMapBase<MemorySpace>> topIdMap = std::make_shared<IdentityMap<Kokkos::HostSpace>>(activeInd-1, activeInd-1);
+        std::shared_ptr<ConditionalMapBase<MemorySpace>> topIdMap = std::make_shared<IdentityMap<MemorySpace>>(activeInd-1, activeInd-1);
 
         // Bottom identity map
-        std::shared_ptr<ConditionalMapBase<MemorySpace>> botIdMap = std::make_shared<IdentityMap<Kokkos::HostSpace>>(dim, dim-activeInd);
+        std::shared_ptr<ConditionalMapBase<MemorySpace>> botIdMap = std::make_shared<IdentityMap<MemorySpace>>(dim, dim-activeInd);
 
         // fill a vector of components with identity, active component, identity
         std::vector<std::shared_ptr<ConditionalMapBase<MemorySpace>>> blocks(3);
@@ -95,8 +95,8 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> mpart::MapFactory::CreateSingle
         output = std::make_shared<TriangularMap<MemorySpace>>(blocks);
 
     }
-
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", output->numCoeffs));
+    Kokkos::View<const double*, MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", output->numCoeffs);
+    output->SetCoeffs(coeffs);
     return output;
 
 }
@@ -118,7 +118,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> mpart::MapFactory::CreateTriang
         comps.at(i) = CreateComponent<MemorySpace>(mset.ToDevice<MemorySpace>(), options);
     }
     auto output = std::make_shared<TriangularMap<MemorySpace>>(comps);
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", output->numCoeffs));
+    
+    Kokkos::View<const double*, MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", output->numCoeffs);
+    output->SetCoeffs(coeffs);
     return output;
 }
 
@@ -160,7 +162,8 @@ std::shared_ptr<ParameterizedFunctionBase<MemorySpace>> mpart::MapFactory::Creat
     }
 
     if(output){
-        output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", output->numCoeffs));
+        Kokkos::View<const double*, MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", output->numCoeffs);
+    	output->SetCoeffs(coeffs);
         return output;
     }
 
@@ -184,10 +187,12 @@ Sigmoid1d<MemorySpace, SigmoidType, EdgeType> CreateSigmoid(
         std::stringstream ss;
         ss << "Incorrect length of centers, " << centers.size() << ".\n";
         ss << "Length should be of form 2+(1+2+3+...+n) for some order n";
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(
+        ProcAgnosticError<std::invalid_argument>(
             ss.str().c_str());
     }
-    Kokkos::parallel_for(max_order+2, KOKKOS_LAMBDA(unsigned int i) {
+    unsigned int N = max_order+2; 
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy{0, N};
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(unsigned int i) {
         if (i == max_order) {
             widths(0) = edgeShape;
             weights(0) = 1./edgeShape;
@@ -233,7 +238,7 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateSigmoidExpansionTemplate(
         ss << "Mismatched input dimensions for offdiag and diag multiindex sets\n"
         << "offdiag: " << mset_offdiag.Length() << "\n"
         << "diag: " << mset_diag.Length();
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(ss.str().c_str());
+        ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
     }
     using Sigmoid_T = Sigmoid1d<MemorySpace, SigmoidType, EdgeType>;
     using DiagBasisEval_T = BasisEvaluator<BasisHomogeneity::OffdiagHomogeneous, Kokkos::pair<OffdiagEval, Sigmoid_T>, Rectifier>;
@@ -270,10 +275,11 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateSigmoidExpansionTemplate(
             mset_offdiag, mset, centers, edgeWidth);
     }
     MultiIndexSet mset = MultiIndexSet::CreateTotalOrder(inputDim, totalOrder, MultiIndexLimiter::NonzeroDiag());
-    FixedMultiIndexSet<MemorySpace> fmset_diag = mset.Fix(true).ToDevice<MemorySpace>();
-    FixedMultiIndexSet<MemorySpace> fmset_offdiag {inputDim-1, totalOrder};
+    FixedMultiIndexSet<MemorySpace> fmset_diag_d = mset.Fix(true).ToDevice<MemorySpace>();
+    FixedMultiIndexSet<Kokkos::HostSpace> fmset_offdiag {inputDim-1, totalOrder};
+    FixedMultiIndexSet<MemorySpace> fmset_offdiag_d = fmset_offdiag.ToDevice<MemorySpace>();
     return CreateSigmoidExpansionTemplate<MemorySpace, OffdiagEval, Rectifier, SigmoidType, EdgeType>(
-        fmset_offdiag, fmset_diag, centers, edgeWidth);
+        fmset_offdiag_d, fmset_diag_d, centers, edgeWidth);
 }
 
 template<typename MemorySpace>
@@ -283,25 +289,25 @@ void HandleSigmoidComponentErrors(MapOptions const& opts) {
         std::string basisString = MapOptions::btypes[static_cast<unsigned int>(opts.basisType)];
         std::stringstream ss;
         ss << "Unsupported basis type for sigmoid expansion: " << basisString;
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(ss.str().c_str());
+        ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
     }
     if(opts.posFuncType != PosFuncTypes::Exp && opts.posFuncType != PosFuncTypes::SoftPlus) {
         std::string posString = MapOptions::pftypes[static_cast<unsigned int>(opts.posFuncType)];
         std::stringstream ss;
         ss << "Unsupported positive function type for sigmoid expansion: " << posString;
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(ss.str().c_str());
+        ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
     }
     if(opts.edgeType != EdgeTypes::SoftPlus) {
         std::string edgeString = MapOptions::etypes[static_cast<unsigned int>(opts.edgeType)];
         std::stringstream ss;
         ss << "Unsupported edge type for sigmoid expansion: " << edgeString;
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(ss.str().c_str());
+        ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
     }
     if(opts.sigmoidType != SigmoidTypes::Logistic) {
         std::string sigmoidString = MapOptions::stypes[static_cast<unsigned int>(opts.sigmoidType)];
         std::stringstream ss;
         ss << "Unsupported sigmoid type for sigmoid expansion: " << sigmoidString;
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(ss.str().c_str());
+        ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
     }
 }
 
@@ -346,7 +352,7 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> MapFactory::CreateSigmoidTriang
     if(outputDim > inputDim) {
         std::stringstream ss;
         ss << "CreateSigmoidTriangular: Output dimension " << outputDim << " cannot be greater than input dimension " << inputDim;
-        ProcAgnosticError<MemorySpace, std::invalid_argument>::error(ss.str().c_str());
+        ProcAgnosticError<std::invalid_argument>(ss.str().c_str());
     }
     std::vector<std::shared_ptr<ConditionalMapBase<MemorySpace>> > comps(outputDim);
     for(int i = 0; i < outputDim; i++) {
@@ -375,4 +381,4 @@ template std::shared_ptr<ConditionalMapBase<Kokkos::HostSpace>> mpart::MapFactor
     template std::shared_ptr<ConditionalMapBase<DeviceSpace>> mpart::MapFactory::CreateSigmoidComponent<DeviceSpace>(unsigned int, unsigned int, StridedVector<const double, DeviceSpace>, MapOptions);
     template std::shared_ptr<ConditionalMapBase<DeviceSpace>> mpart::MapFactory::CreateSigmoidComponent<DeviceSpace>(FixedMultiIndexSet<DeviceSpace>, FixedMultiIndexSet<DeviceSpace>, StridedVector<const double, DeviceSpace>, MapOptions);
     template std::shared_ptr<ConditionalMapBase<DeviceSpace>> mpart::MapFactory::CreateSigmoidTriangular<DeviceSpace>(unsigned int, unsigned int, unsigned int, std::vector<StridedVector<const double, DeviceSpace>> const&, MapOptions);
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MapFactoryImpl1.cpp b/src/MapFactoryImpl1.cpp
index 883d7163..dc6ed1d6 100644
--- a/src/MapFactoryImpl1.cpp
+++ b/src/MapFactoryImpl1.cpp
@@ -26,7 +26,8 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_Phys_ACC(Fi
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*, MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
     return output;
 }
 
@@ -42,8 +43,8 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, Adaptiv
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
 #endif 
 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory1)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl10.cpp b/src/MapFactoryImpl10.cpp
index 89caf963..c7e2e8a8 100644
--- a/src/MapFactoryImpl10.cpp
+++ b/src/MapFactoryImpl10.cpp
@@ -26,7 +26,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinPhys_ACC
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -42,7 +44,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Physici
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory10)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl11.cpp b/src/MapFactoryImpl11.cpp
index c82bb35d..c4e0299d 100644
--- a/src/MapFactoryImpl11.cpp
+++ b/src/MapFactoryImpl11.cpp
@@ -24,7 +24,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinPhys_CC(
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -40,7 +42,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Physici
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory11)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl12.cpp b/src/MapFactoryImpl12.cpp
index 03edb83d..8723ce92 100644
--- a/src/MapFactoryImpl12.cpp
+++ b/src/MapFactoryImpl12.cpp
@@ -23,7 +23,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinPhys_AS(
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Physici
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, Exp, AdaptiveSimpson, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, Softplus, AdaptiveSimpson, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::PhysicistHermite>, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory12)
 #endif
diff --git a/src/MapFactoryImpl13.cpp b/src/MapFactoryImpl13.cpp
index 793e7728..757d9fb4 100644
--- a/src/MapFactoryImpl13.cpp
+++ b/src/MapFactoryImpl13.cpp
@@ -25,7 +25,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinProb_ACC
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -41,7 +43,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Probabi
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory13)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl14.cpp b/src/MapFactoryImpl14.cpp
index d58723c2..794b70b9 100644
--- a/src/MapFactoryImpl14.cpp
+++ b/src/MapFactoryImpl14.cpp
@@ -23,7 +23,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinProb_CC(
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Probabi
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory14)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl15.cpp b/src/MapFactoryImpl15.cpp
index d2e43efd..f5ae8513 100644
--- a/src/MapFactoryImpl15.cpp
+++ b/src/MapFactoryImpl15.cpp
@@ -23,7 +23,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinProb_AS(
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Probabi
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, Exp, AdaptiveSimpson, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, Softplus, AdaptiveSimpson, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::ProbabilistHermite>, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory15)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl16.cpp b/src/MapFactoryImpl16.cpp
index ad34fd4e..40f9c109 100644
--- a/src/MapFactoryImpl16.cpp
+++ b/src/MapFactoryImpl16.cpp
@@ -23,15 +23,17 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinHF_AS(Fi
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
 static auto reg_host_linhf_as_exp = mpart::MapFactory::CompFactoryImpl<Kokkos::HostSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS<Kokkos::HostSpace, Exp>));
 static auto reg_host_linhf_as_splus = mpart::MapFactory::CompFactoryImpl<Kokkos::HostSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS<Kokkos::HostSpace, SoftPlus>));
 #if defined(MPART_ENABLE_GPU)
-    static auto reg_device_linhf_as_exp = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS<Kokkos::HostSpace, Exp>));
-    static auto reg_device_linhf_as_splus = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS<Kokkos::HostSpace, SoftPlus>));
+    static auto reg_device_linhf_as_exp = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS<mpart::DeviceSpace, Exp>));
+    static auto reg_device_linhf_as_splus = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS<mpart::DeviceSpace, SoftPlus>));
 #endif
 
 #if defined(MPART_HAS_CEREAL)
@@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Hermite
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, Exp, AdaptiveSimpson, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, Softplus, AdaptiveSimpson, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory16)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl17.cpp b/src/MapFactoryImpl17.cpp
index bb3d979c..f14fae97 100644
--- a/src/MapFactoryImpl17.cpp
+++ b/src/MapFactoryImpl17.cpp
@@ -23,15 +23,17 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinHF_CC(Fi
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
 static auto reg_host_linhf_cc_exp = mpart::MapFactory::CompFactoryImpl<Kokkos::HostSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC<Kokkos::HostSpace, Exp>));
 static auto reg_host_linhf_cc_splus = mpart::MapFactory::CompFactoryImpl<Kokkos::HostSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC<Kokkos::HostSpace, SoftPlus>));
 #if defined(MPART_ENABLE_GPU)
-    static auto reg_device_linhf_cc_exp = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC<Kokkos::HostSpace, Exp>));
-    static auto reg_device_linhf_cc_splus = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC<Kokkos::HostSpace, SoftPlus>));
+    static auto reg_device_linhf_cc_exp = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC<mpart::DeviceSpace, Exp>));
+    static auto reg_device_linhf_cc_splus = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC<mpart::DeviceSpace, SoftPlus>));
 #endif
 
 #if defined(MPART_HAS_CEREAL)
@@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Hermite
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory17)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl18.cpp b/src/MapFactoryImpl18.cpp
index e98007a4..f135404a 100644
--- a/src/MapFactoryImpl18.cpp
+++ b/src/MapFactoryImpl18.cpp
@@ -24,15 +24,17 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_LinHF_ACC(F
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
 static auto reg_host_linhf_acc_exp = mpart::MapFactory::CompFactoryImpl<Kokkos::HostSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC<Kokkos::HostSpace, Exp>));
 static auto reg_host_linhf_acc_splus = mpart::MapFactory::CompFactoryImpl<Kokkos::HostSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC<Kokkos::HostSpace, SoftPlus>));
 #if defined(MPART_ENABLE_GPU)
-    static auto reg_device_linhf_acc_exp = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC<Kokkos::HostSpace, Exp>));
-    static auto reg_device_linhf_acc_splus = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC<Kokkos::HostSpace, SoftPlus>));
+    static auto reg_device_linhf_acc_exp = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC<mpart::DeviceSpace, Exp>));
+    static auto reg_device_linhf_acc_splus = mpart::MapFactory::CompFactoryImpl<DeviceSpace>::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC<mpart::DeviceSpace, SoftPlus>));
 #endif
 
 #if defined(MPART_HAS_CEREAL)
@@ -40,7 +42,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::Hermite
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis<mpart::HermiteFunction>, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory18)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl2.cpp b/src/MapFactoryImpl2.cpp
index b6b23fb6..67ebc2bc 100644
--- a/src/MapFactoryImpl2.cpp
+++ b/src/MapFactoryImpl2.cpp
@@ -22,7 +22,8 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_Phys_CC(Fix
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
     return output;
 }
 
@@ -38,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, Clensha
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory2)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl3.cpp b/src/MapFactoryImpl3.cpp
index 7bac8fb4..b7b19289 100644
--- a/src/MapFactoryImpl3.cpp
+++ b/src/MapFactoryImpl3.cpp
@@ -21,7 +21,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_Phys_AS(Fix
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, Adaptiv
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, AdaptiveSimpson, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Softplus, AdaptiveSimpson, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory3)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl4.cpp b/src/MapFactoryImpl4.cpp
index eabfb21a..e1f8edc7 100644
--- a/src/MapFactoryImpl4.cpp
+++ b/src/MapFactoryImpl4.cpp
@@ -22,8 +22,10 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_Prob_ACC(Fi
     std::shared_ptr<ConditionalMapBase<MemorySpace>> output;
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
+    
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
     return output;
 }
 
@@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, Adapt
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory4)
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/MapFactoryImpl5.cpp b/src/MapFactoryImpl5.cpp
index 2eaa8113..4bc9a03a 100644
--- a/src/MapFactoryImpl5.cpp
+++ b/src/MapFactoryImpl5.cpp
@@ -21,7 +21,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_Prob_CC(Fix
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, Clens
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory5)
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MapFactoryImpl6.cpp b/src/MapFactoryImpl6.cpp
index 63348840..0e51786e 100644
--- a/src/MapFactoryImpl6.cpp
+++ b/src/MapFactoryImpl6.cpp
@@ -21,7 +21,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_Prob_AS(Fix
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -38,7 +40,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, Adapt
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, AdaptiveSimpson, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Softplus, AdaptiveSimpson, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory6)
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MapFactoryImpl7.cpp b/src/MapFactoryImpl7.cpp
index 93463d21..6a29d336 100644
--- a/src/MapFactoryImpl7.cpp
+++ b/src/MapFactoryImpl7.cpp
@@ -23,7 +23,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_HF_ACC(Fixe
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -40,7 +42,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, Adaptive
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace)
 #endif
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory7) 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MapFactoryImpl8.cpp b/src/MapFactoryImpl8.cpp
index 69ab2725..04a91cc5 100644
--- a/src/MapFactoryImpl8.cpp
+++ b/src/MapFactoryImpl8.cpp
@@ -21,7 +21,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_HF_CC(Fixed
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, Clenshaw
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory8)
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MapFactoryImpl9.cpp b/src/MapFactoryImpl9.cpp
index 12d7316a..11313bfc 100644
--- a/src/MapFactoryImpl9.cpp
+++ b/src/MapFactoryImpl9.cpp
@@ -21,7 +21,9 @@ std::shared_ptr<ConditionalMapBase<MemorySpace>> CreateComponentImpl_HF_AS(Fixed
 
     output = std::make_shared<MonotoneComponent<decltype(expansion), PosFuncType, decltype(quad), MemorySpace>>(expansion, quad, opts.contDeriv, opts.nugget);
 
-    output->SetCoeffs(Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size()));
+    Kokkos::View<const double*,MemorySpace> coeffs = Kokkos::View<double*,MemorySpace>("Component Coefficients", mset.Size());
+    output->SetCoeffs(coeffs);
+
     return output;
 }
 
@@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, Adaptive
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace)
 #if defined(MPART_ENABLE_GPU)
 REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, AdaptiveSimpson, mpart::DeviceSpace)
-REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Softplus, AdaptiveSimpson, mpart::DeviceSpace)
+REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace)
 #endif 
 CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory9)
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MapObjective.cpp b/src/MapObjective.cpp
index d46f1737..50431688 100644
--- a/src/MapObjective.cpp
+++ b/src/MapObjective.cpp
@@ -1,10 +1,11 @@
 #include "MParT/MapObjective.h"
+
 using namespace mpart;
 
 template<typename MemorySpace>
 double MapObjective<MemorySpace>::operator()(unsigned int n, const double* coeffs, double* grad, std::shared_ptr<ConditionalMapBase<MemorySpace>> map) {
 
-    StridedVector<const double, MemorySpace> coeffView = ToConstKokkos<double,MemorySpace>(coeffs, n);
+    Kokkos::View<const double*, MemorySpace> coeffView = ToConstKokkos<double,MemorySpace>(coeffs, n);
     StridedVector<double, MemorySpace> gradView = ToKokkos<double,MemorySpace>(grad, n);
     map->SetCoeffs(coeffView);
     return ObjectivePlusCoeffGradImpl(train_, gradView, map);
@@ -51,14 +52,16 @@ std::shared_ptr<MapObjective<MemorySpace>> ObjectiveFactory::CreateGaussianKLObj
 
 template<typename MemorySpace>
 double KLObjective<MemorySpace>::ObjectivePlusCoeffGradImpl(StridedMatrix<const double, MemorySpace> data, StridedVector<double, MemorySpace> grad, std::shared_ptr<ConditionalMapBase<MemorySpace>> map) const {
+    using ExecSpace = typename MemoryToExecution<MemorySpace>::Space;
     unsigned int N_samps = data.extent(1);
     unsigned int grad_dim = grad.extent(0);
     PullbackDensity<MemorySpace> pullback {map, density_};
     StridedVector<double, MemorySpace> densityX = pullback.LogDensity(data);
     StridedMatrix<double, MemorySpace> densityGradX = pullback.LogDensityCoeffGrad(data);
     double sumDensity = 0.;
-    
-    Kokkos::parallel_reduce ("Sum Negative Log Likelihood", N_samps, KOKKOS_LAMBDA (const int i, double &sum) {
+   
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy(0,N_samps); 
+    Kokkos::parallel_reduce ("Sum Negative Log Likelihood", policy, KOKKOS_LAMBDA (const int i, double &sum) {
         sum -= densityX(i);
     }, sumDensity);
 
@@ -66,21 +69,22 @@ double KLObjective<MemorySpace>::ObjectivePlusCoeffGradImpl(StridedMatrix<const
     if (grad.data()!=nullptr){
         double scale = -1.0/((double) N_samps);
 
-        Kokkos::TeamPolicy<MemoryToExecution<MemorySpace>> policy(grad_dim, Kokkos::AUTO());
-
+        Kokkos::TeamPolicy<ExecSpace> policy(grad_dim, Kokkos::AUTO());
+        using team_handle = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
         Kokkos::parallel_for(policy,
-            KOKKOS_LAMBDA(auto& tag, auto&& teamMember){
+            KOKKOS_LAMBDA(const team_handle& teamMember){
                 int row = teamMember.league_rank();
                 double thisRowSum = 0.0;
                 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), 
-                    KOKKOS_LAMBDA(int col, double& innerUpdate){
-                        innerUpdate += scale*densityGradX(row,col);
-                    }, 
+                [&] (const int& col, double& innerUpdate) {
+                    innerUpdate += scale*densityGradX(row,col);
+                },
                 thisRowSum);
 
                 grad(row) = thisRowSum;
             }
         );
+        Kokkos::fence();
     }
     return sumDensity/N_samps;
 }
@@ -91,7 +95,8 @@ double KLObjective<MemorySpace>::ObjectiveImpl(StridedMatrix<const double, Memor
     PullbackDensity<MemorySpace> pullback {map, density_};
     StridedVector<double, MemorySpace> densityX = pullback.LogDensity(data);
     double sumDensity = 0.;
-    Kokkos::parallel_reduce ("Sum Negative Log Likelihood", N_samps, KOKKOS_LAMBDA (const int i, double &sum) {
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy(0,N_samps); 
+    Kokkos::parallel_reduce ("Sum Negative Log Likelihood", policy, KOKKOS_LAMBDA (const int i, double &sum) {
         sum -= densityX(i);
     }, sumDensity);
     return sumDensity/N_samps;
@@ -99,22 +104,28 @@ double KLObjective<MemorySpace>::ObjectiveImpl(StridedMatrix<const double, Memor
 
 template<typename MemorySpace>
 void KLObjective<MemorySpace>::CoeffGradImpl(StridedMatrix<const double, MemorySpace> data, StridedVector<double, MemorySpace> grad, std::shared_ptr<ConditionalMapBase<MemorySpace>> map) const {
+    using ExecSpace = typename MemoryToExecution<MemorySpace>::Space;
     unsigned int N_samps = data.extent(1);
     unsigned int grad_dim = grad.extent(0);
     PullbackDensity<MemorySpace> pullback {map, density_};
     StridedMatrix<double, MemorySpace> densityGradX = pullback.LogDensityCoeffGrad(data);
 
     double scale = -1.0/((double) N_samps);
-    Kokkos::parallel_for("grad", Kokkos::TeamPolicy<MemoryToExecution<MemorySpace>>(grad_dim, Kokkos::AUTO()),
-        KOKKOS_LAMBDA(auto t, typename Kokkos::TeamPolicy<MemoryToExecution<MemorySpace>>::member_type teamMember){
+    Kokkos::TeamPolicy<ExecSpace> policy(grad_dim, Kokkos::AUTO());
+    using team_handle = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
+    Kokkos::parallel_for(policy,
+        KOKKOS_LAMBDA(const team_handle& teamMember){
             int row = teamMember.league_rank();
             double thisRowSum = 0.0;
-            Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), KOKKOS_LAMBDA(int col, double& innerUpdate){
-              innerUpdate += scale*densityGradX(row,col);
-            }, thisRowSum);
+            Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), 
+            [&] (const int& col, double& innerUpdate) {
+                innerUpdate += scale*densityGradX(row,col);
+            },
+            thisRowSum);
             grad(row) = thisRowSum;
         }
     );
+    Kokkos::fence();
 }
 
 // Explicit template instantiation
@@ -127,4 +138,4 @@ template std::shared_ptr<MapObjective<Kokkos::HostSpace>> mpart::ObjectiveFactor
     template class mpart::KLObjective<DeviceSpace>;
     template std::shared_ptr<MapObjective<DeviceSpace>> mpart::ObjectiveFactory::CreateGaussianKLObjective<DeviceSpace>(StridedMatrix<const double, DeviceSpace>, unsigned int);
     template std::shared_ptr<MapObjective<DeviceSpace>> mpart::ObjectiveFactory::CreateGaussianKLObjective<DeviceSpace>(StridedMatrix<const double, DeviceSpace>, StridedMatrix<const double, DeviceSpace>, unsigned int);
-#endif
\ No newline at end of file
+#endif
diff --git a/src/MultiIndices/FixedMultiIndexSet.cpp b/src/MultiIndices/FixedMultiIndexSet.cpp
index a96cb3c1..9a27a184 100644
--- a/src/MultiIndices/FixedMultiIndexSet.cpp
+++ b/src/MultiIndices/FixedMultiIndexSet.cpp
@@ -2,6 +2,9 @@
 #include "MParT/MultiIndices/MultiIndex.h"
 #include "MParT/MultiIndices/MultiIndexSet.h"
 #include "MParT/Utilities/ArrayConversions.h"
+#include "MParT/Utilities/KokkosSpaceMappings.h"
+#include "MParT/Utilities/GPUtils.h"
+
 #include <stdio.h>
 
 using namespace mpart;
@@ -126,8 +129,10 @@ void FixedMultiIndexSet<MemorySpace>::SetupTerms()
     unsigned int numTerms = nzOrders.extent(0) / dim;
 
     nzStarts = Kokkos::View<unsigned int*, MemorySpace>("Start of a Multiindex", numTerms+1);
-    Kokkos::parallel_for(numTerms, StartSetter<MemorySpace>(nzStarts,dim));
-    Kokkos::parallel_for(dim*numTerms, DimSetter<MemorySpace>(nzDims,dim));
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy(0, numTerms);
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policyDims(0, dim*numTerms);
+    Kokkos::parallel_for(policy, StartSetter<MemorySpace>(nzStarts,dim));
+    Kokkos::parallel_for(policyDims, DimSetter<MemorySpace>(nzDims,dim));
 }
 template<>
 void FixedMultiIndexSet<Kokkos::HostSpace>::SetupTerms()
@@ -153,9 +158,11 @@ template<typename MemorySpace>
 void FixedMultiIndexSet<MemorySpace>::CalculateMaxDegrees()
 {
     maxDegrees = Kokkos::View<unsigned int*, MemorySpace>("Maximum degrees", dim);
-
-    Kokkos::parallel_for(dim, MaxDegreeInitializer<MemorySpace>(maxDegrees));
-    Kokkos::parallel_for(nzOrders.extent(0), MaxDegreeSetter<MemorySpace>(maxDegrees, nzDims, nzOrders, dim));
+    
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> DimPolicy(0, dim); 
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> NZPolicy(0, nzOrders.extent(0));
+    Kokkos::parallel_for(DimPolicy, MaxDegreeInitializer<MemorySpace>(maxDegrees));
+    Kokkos::parallel_for(NZPolicy, MaxDegreeSetter<MemorySpace>(maxDegrees, nzDims, nzOrders, dim));
 }
 
 template<>
@@ -186,8 +193,11 @@ FixedMultiIndexSet<MemorySpace>::FixedMultiIndexSet(unsigned int
                                                                                 nzDims(nzDims),
                                                                                 nzOrders(nzOrders)
 {
+    DimensionSorter<MemorySpace> dimSort {nzStarts, nzDims, nzOrders};
     // Sort so that nzDims increases for each multiindex
-    Kokkos::parallel_for(nzStarts.extent(0)-1, DimensionSorter<MemorySpace>(nzStarts, nzDims, nzOrders));
+    unsigned int N_midxs = nzStarts.extent(0) - 1;
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy{0lu, N_midxs};
+    Kokkos::parallel_for(policy, dimSort);
 
     CalculateMaxDegrees();
 }
@@ -456,18 +466,18 @@ FixedMultiIndexSet<Kokkos::HostSpace> FixedMultiIndexSet<Kokkos::HostSpace>::ToD
 #if defined(MPART_ENABLE_GPU)
     template<>
     template<>
-    FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space> FixedMultiIndexSet<Kokkos::HostSpace>::ToDevice<Kokkos::DefaultExecutionSpace::memory_space>()
+    FixedMultiIndexSet<DeviceSpace> FixedMultiIndexSet<Kokkos::HostSpace>::ToDevice<DeviceSpace>()
     {
-        auto deviceStarts = mpart::ToDevice<Kokkos::DefaultExecutionSpace::memory_space>(nzStarts);
-        auto deviceDims = mpart::ToDevice<Kokkos::DefaultExecutionSpace::memory_space>(nzDims);
-        auto deviceOrders =  mpart::ToDevice<Kokkos::DefaultExecutionSpace::memory_space>(nzOrders);
-        FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space> output(dim, deviceStarts, deviceDims, deviceOrders);
+        auto deviceStarts = mpart::ToDevice<DeviceSpace>(nzStarts);
+        auto deviceDims = mpart::ToDevice<DeviceSpace>(nzDims);
+        auto deviceOrders =  mpart::ToDevice<DeviceSpace>(nzOrders);
+        FixedMultiIndexSet<DeviceSpace> output(dim, deviceStarts, deviceDims, deviceOrders);
         return output;
     }
 
     template<>
     template<>
-    FixedMultiIndexSet<Kokkos::HostSpace> FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space>::ToDevice<Kokkos::HostSpace>()
+    FixedMultiIndexSet<Kokkos::HostSpace> FixedMultiIndexSet<DeviceSpace>::ToDevice<Kokkos::HostSpace>()
     {
         assert(false);
         return FixedMultiIndexSet<Kokkos::HostSpace>(0,0);
@@ -475,10 +485,9 @@ FixedMultiIndexSet<Kokkos::HostSpace> FixedMultiIndexSet<Kokkos::HostSpace>::ToD
 
     template<>
     template<>
-    FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space> FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space>::ToDevice<Kokkos::DefaultExecutionSpace::memory_space>()
+    FixedMultiIndexSet<DeviceSpace> FixedMultiIndexSet<DeviceSpace>::ToDevice<DeviceSpace>()
     {
-        assert(false);
-        return FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space>(0,0);
+        return *this;
     }
 
 #endif
@@ -487,7 +496,7 @@ FixedMultiIndexSet<Kokkos::HostSpace> FixedMultiIndexSet<Kokkos::HostSpace>::ToD
 // Explicit template instantiation
 #if defined(MPART_ENABLE_GPU)
     template class mpart::FixedMultiIndexSet<Kokkos::HostSpace>;
-    template class mpart::FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space>;
+    template class mpart::FixedMultiIndexSet<DeviceSpace>;
 #else
     template class mpart::FixedMultiIndexSet<Kokkos::HostSpace>;
 #endif
diff --git a/src/ParameterizedFunctionBase.cpp b/src/ParameterizedFunctionBase.cpp
index 8887ccf6..f38b4ff9 100644
--- a/src/ParameterizedFunctionBase.cpp
+++ b/src/ParameterizedFunctionBase.cpp
@@ -181,31 +181,42 @@ Eigen::RowMatrixXd ParameterizedFunctionBase<mpart::DeviceSpace>::Gradient(Eigen
 
 #endif
 
-
-
-template<typename MemorySpace>
-void ParameterizedFunctionBase<MemorySpace>::SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs){
-
+template<typename MemorySpaceSrc, typename MemorySpaceDest>
+void SetCoeffsInternal(unsigned int numCoeffs,
+        Kokkos::View<double*,MemorySpaceDest>& coeffsDest,
+        Kokkos::View<const double*,MemorySpaceSrc> coeffsSrc) {
     // If coefficients already exist, make sure the sizes match
-    if(this->savedCoeffs.is_allocated()){
-        if(coeffs.size() != numCoeffs){
+    if(coeffsDest.is_allocated()){
+        if(coeffsSrc.size() != numCoeffs){
             std::stringstream msg;
-            msg << "Error in ParameterizedFunctionBase<MemorySpace>::SetCoeffs.  Expected coefficient vector with size " << numCoeffs << ", but new coefficients have size " << coeffs.size() << ".";
+            msg << "Error in ParameterizedFunctionBase<MemorySpace>::SetCoeffs.  Expected coefficient vector with size " << numCoeffs << ", but new coefficients have size " << coeffsSrc.size() << ".";
             throw std::invalid_argument(msg.str());
         }
 
-        if(this->savedCoeffs.size() != numCoeffs)
-            Kokkos::resize(this->savedCoeffs, numCoeffs);
+        if(coeffsDest.size() != numCoeffs)
+            Kokkos::resize(coeffsDest, numCoeffs);
     }else{
-
-        this->savedCoeffs = Kokkos::View<double*, MemorySpace>("ParameterizedFunctionBase<MemorySpace> Coefficients", coeffs.size());
+        coeffsDest = Kokkos::View<double*, MemorySpaceDest>("ParameterizedFunctionBase<MemorySpace> Coefficients", numCoeffs);
     }
 
-    Kokkos::deep_copy(this->savedCoeffs, coeffs);
+    Kokkos::deep_copy(coeffsDest, coeffsSrc);
+}
+
+template<typename MemorySpace>
+void ParameterizedFunctionBase<MemorySpace>::SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs){
+    SetCoeffsInternal(this->numCoeffs, this->savedCoeffs, coeffs);
+}
+
+template<typename MemorySpace>
+void ParameterizedFunctionBase<MemorySpace>::SetCoeffs(Kokkos::View<double*, MemorySpace> coeffs){
+    Kokkos::View<const double*, MemorySpace> ConstCoeffs = coeffs;
+    SetCoeffs(ConstCoeffs);
 }
 
+
+
 template<typename MemorySpace>
-void ParameterizedFunctionBase<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs){
+void ParameterizedFunctionBase<MemorySpace>::WrapCoeffs(Kokkos::View<double*, MemorySpace> coeffs){
 
     if(coeffs.size() != numCoeffs){
         std::stringstream msg;
@@ -213,37 +224,43 @@ void ParameterizedFunctionBase<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Ko
         throw std::invalid_argument(msg.str());
     }
     this->savedCoeffs = coeffs;
+
 }
 
 #if defined(MPART_ENABLE_GPU)
-
 template<>
 void ParameterizedFunctionBase<mpart::DeviceSpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs)
 {
-    // Copy the coefficients to the device
-    Kokkos::View<double*, mpart::DeviceSpace> coeffs_device = ToDevice<mpart::DeviceSpace>(coeffs);
-    this->SetCoeffs(coeffs_device);
+    SetCoeffsInternal(this->numCoeffs, this->savedCoeffs, coeffs);
+}
+template<>
+void ParameterizedFunctionBase<Kokkos::HostSpace>::SetCoeffs(Kokkos::View<const double*, mpart::DeviceSpace> coeffs)
+{
+    SetCoeffsInternal(this->numCoeffs, this->savedCoeffs, coeffs);
 }
 
-
-template<typename MemorySpace>
-void ParameterizedFunctionBase<MemorySpace>::WrapCoeffs(Kokkos::View<double*, mpart::DeviceSpace> coeffs)
+template<>
+void ParameterizedFunctionBase<mpart::DeviceSpace>::SetCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs)
 {
+    Kokkos::View<const double*, Kokkos::HostSpace> ConstCoeffs = coeffs;
+    SetCoeffs(ConstCoeffs);
+}
+template<>
+void ParameterizedFunctionBase<Kokkos::HostSpace>::SetCoeffs(Kokkos::View<double*, mpart::DeviceSpace> coeffs)
+{   
+    Kokkos::View<const double*, mpart::DeviceSpace> ConstCoeffs = coeffs;
+    SetCoeffs(ConstCoeffs);
+}
+
 
-    if(coeffs.size() != numCoeffs){
-        std::stringstream msg;
-        msg << "Error in ParameterizedFunctionBase<MemorySpace>::WrapCoeffs.  Expected coefficient vector with size " << numCoeffs << ", but new coefficients have size " << coeffs.size() << ".";
-        throw std::invalid_argument(msg.str());
-    }
 
-    this->savedCoeffs = coeffs;
-}
 #endif
 
 template<typename MemorySpace>
 void ParameterizedFunctionBase<MemorySpace>::SetCoeffs(Eigen::Ref<Eigen::VectorXd> coeffs)
 {
-     SetCoeffs(Kokkos::View<double*,MemorySpace>(VecToKokkos<double,MemorySpace>(coeffs)));
+    Kokkos::View<const double*, MemorySpace> inputCoeffs = VecToKokkos<double, MemorySpace>(coeffs);
+    SetCoeffs(inputCoeffs);
 }
 
 template<typename MemorySpace>
@@ -254,7 +271,7 @@ void ParameterizedFunctionBase<MemorySpace>::WrapCoeffs(Eigen::Ref<Eigen::Vector
 template<>
 void ParameterizedFunctionBase<Kokkos::HostSpace>::WrapCoeffs(Eigen::Ref<Eigen::VectorXd> coeffs)
 {
-     WrapCoeffs(Kokkos::View<double*,Kokkos::HostSpace>(VecToKokkos<double,Kokkos::HostSpace>(coeffs)));
+    WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace>(VecToKokkos<double, Kokkos::HostSpace>(coeffs)));
 }
 
 template<>
@@ -372,4 +389,4 @@ template class mpart::ParameterizedFunctionBase<Kokkos::HostSpace>;
     }
 
     template class mpart::ParameterizedFunctionBase<DeviceSpace>;
-#endif
\ No newline at end of file
+#endif
diff --git a/src/SummarizedMap.cpp b/src/SummarizedMap.cpp
index 05bf6322..3bfcf9c9 100644
--- a/src/SummarizedMap.cpp
+++ b/src/SummarizedMap.cpp
@@ -27,7 +27,7 @@ SummarizedMap<MemorySpace>::SummarizedMap(std::shared_ptr<ParameterizedFunctionB
 }
 
 template<typename MemorySpace>
-void SummarizedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs)
+void SummarizedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, MemorySpace> coeffs)
 {
     // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
     ConditionalMapBase<MemorySpace>::SetCoeffs(coeffs);
@@ -35,7 +35,7 @@ void SummarizedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::H
 }
 
 template<typename MemorySpace>
-void SummarizedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Kokkos::HostSpace> coeffs)
+void SummarizedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, MemorySpace> coeffs)
 {
 
     // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
@@ -45,23 +45,24 @@ void SummarizedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Kokkos::HostSp
 }
 
 #if defined(MPART_ENABLE_GPU)
-template<typename MemorySpace>
-void SummarizedMap<MemorySpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::DefaultExecutionSpace::memory_space> coeffs)
+template<>
+void SummarizedMap<Kokkos::HostSpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::DefaultExecutionSpace::memory_space> coeffs)
 {
 
         // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
-    ConditionalMapBase<MemorySpace>::SetCoeffs(coeffs);
-    comp_->WrapCoeffs(coeffs);
+    ConditionalMapBase<Kokkos::HostSpace>::SetCoeffs(coeffs);
+    comp_->WrapCoeffs(this->savedCoeffs);
 
 }
+template<>
+void SummarizedMap<mpart::DeviceSpace>::SetCoeffs(Kokkos::View<const double*, Kokkos::HostSpace> coeffs)
 
-template<typename MemorySpace>
-void SummarizedMap<MemorySpace>::WrapCoeffs(Kokkos::View<double*, Kokkos::DefaultExecutionSpace::memory_space> coeffs)
 {
-    // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
-    ConditionalMapBase<MemorySpace>::WrapCoeffs(coeffs);
 
-    comp_->WrapCoeffs(coeffs);
+        // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable
+    ConditionalMapBase<mpart::DeviceSpace>::SetCoeffs(coeffs);
+    comp_->WrapCoeffs(this->savedCoeffs);
+
 }
 #endif
 
@@ -240,4 +241,4 @@ void SummarizedMap<MemorySpace>::LogDeterminantInputGradImpl(StridedMatrix<const
 template class mpart::SummarizedMap<Kokkos::HostSpace>;
 #if defined(MPART_ENABLE_GPU)
     template class mpart::SummarizedMap<DeviceSpace>;
-#endif
\ No newline at end of file
+#endif
diff --git a/src/TrainMap.cpp b/src/TrainMap.cpp
index db177a0f..af74a58b 100644
--- a/src/TrainMap.cpp
+++ b/src/TrainMap.cpp
@@ -63,10 +63,12 @@ double mpart::TrainMap(std::shared_ptr<ConditionalMapBase<Kokkos::HostSpace>> ma
             std::cout << "TrainMap: Initializing map coeffs to 1." << std::endl;
         }
         Kokkos::View<double*, Kokkos::HostSpace> coeffs ("Default coeffs", map->numCoeffs);
-        Kokkos::parallel_for("Setting default coeff val", map->numCoeffs, KOKKOS_LAMBDA(const unsigned int i){
+	Kokkos::RangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space> policy(0,map->numCoeffs);
+        Kokkos::parallel_for("Setting default coeff val", policy, KOKKOS_LAMBDA(const unsigned int i){
             coeffs(i) = 1.;
         });
-        map->SetCoeffs(coeffs);
+	Kokkos::View<const double*, Kokkos::HostSpace> constCoeffs = coeffs;
+        map->SetCoeffs(constCoeffs);
     }
     nlopt::opt opt = SetupOptimization(map->numCoeffs, options);
 
@@ -83,7 +85,7 @@ double mpart::TrainMap(std::shared_ptr<ConditionalMapBase<Kokkos::HostSpace>> ma
     nlopt::result res = opt.optimize(mapCoeffsStd, error);
 
     // Set the coefficients using SetCoeffs
-    Kokkos::View<double*, Kokkos::HostSpace> mapCoeffsView = VecToKokkos<double,Kokkos::HostSpace>(mapCoeffsStd);
+    Kokkos::View<const double*, Kokkos::HostSpace> mapCoeffsView = VecToKokkos<double,Kokkos::HostSpace>(mapCoeffsStd);
     map->SetCoeffs(mapCoeffsView);
 
     // Print a warning if something goes wrong with NLOpt
diff --git a/src/Utilities/LinearAlgebra.cpp b/src/Utilities/LinearAlgebra.cpp
index 157cf9ab..9543ab96 100644
--- a/src/Utilities/LinearAlgebra.cpp
+++ b/src/Utilities/LinearAlgebra.cpp
@@ -295,7 +295,8 @@ double PartialPivLU<mpart::DeviceSpace>::determinant() const
 {
     assert(isComputed);
     double det = 1.0;
-    Kokkos::parallel_reduce(LU_.extent(0), KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) {
+    Kokkos::RangePolicy<typename MemoryToExecution<mpart::DeviceSpace>::Space> policy(0, LU_.extent(0));
+    Kokkos::parallel_reduce(policy, KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) {
         lprod *= LU_(i,i);
     },Kokkos::Prod<double>(det));
 
@@ -473,7 +474,8 @@ double Cholesky<mpart::DeviceSpace>::determinant() const
 {
     assert(isComputed);
     double det = 1.0;
-    Kokkos::parallel_reduce(LLT_.extent(0), KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) {
+    Kokkos::RangePolicy<typename MemoryToExecution<mpart::DeviceSpace>::Space> policy(0, LLT_.extent(0));
+    Kokkos::parallel_reduce(policy, KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) {
         lprod *= LLT_(i,i);
     },Kokkos::Prod<double>(det));
     return det*det;
@@ -485,4 +487,4 @@ template struct mpart::Cholesky<mpart::DeviceSpace>;
 #endif
 
 template struct mpart::PartialPivLU<Kokkos::HostSpace>;
-template struct mpart::Cholesky<Kokkos::HostSpace>;
\ No newline at end of file
+template struct mpart::Cholesky<Kokkos::HostSpace>;
diff --git a/tests/Distributions/Test_DensityBase.cpp b/tests/Distributions/Test_DensityBase.cpp
index 977966e4..94c8e680 100644
--- a/tests/Distributions/Test_DensityBase.cpp
+++ b/tests/Distributions/Test_DensityBase.cpp
@@ -27,10 +27,7 @@ TEST_CASE( "Testing Custom Uniform Density", "[UniformDensity]" ) {
 
     SECTION("LogDensityInputGradImpl") {
         Kokkos::View<double**, Kokkos::HostSpace> output ("output", 2, N_pts);
-        Kokkos::parallel_for( "initialize output", N_pts, KOKKOS_LAMBDA (const int& j) {
-            output(0,j) = -3.;
-            output(1,j) = -3.;
-        });
+        Kokkos::deep_copy(output, -3.);
         density->LogDensityInputGradImpl(pts, output);
         for(unsigned int j = 0; j < N_pts; ++j) {
             REQUIRE(output(0,j) == Approx(0.));
diff --git a/tests/Distributions/Test_Distributions_Common.h b/tests/Distributions/Test_Distributions_Common.h
index eb98ba1d..36d40e38 100644
--- a/tests/Distributions/Test_Distributions_Common.h
+++ b/tests/Distributions/Test_Distributions_Common.h
@@ -23,7 +23,7 @@ UniformSampler(int dim, double scale = std::exp(1.)): SampleGenerator<MemorySpac
 
 void SampleImpl(StridedMatrix<double, MemorySpace> output) {
     Kokkos::MDRangePolicy<Kokkos::Rank<2>,typename MemoryToExecution<MemorySpace>::Space> policy({0, 0}, {output.extent(0), output.extent(1)});
-    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i, int j) {
+    Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(int i, int j) {
         auto rgen = this->rand_pool.get_state();
         output(i,j) = scale_*rgen.drand();
         this->rand_pool.free_state(rgen);
@@ -42,7 +42,8 @@ UniformDensity(int dim): DensityBase<MemorySpace>(dim) {}
 void LogDensityImpl(StridedMatrix<const double, MemorySpace> const &pts, StridedVector<double, MemorySpace> output) override {
     double euler = std::exp(1.0);
     unsigned int N = pts.extent(1);
-    Kokkos::parallel_for( "uniform log density", N, KOKKOS_LAMBDA (const int& j) {
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy(0, N);
+    Kokkos::parallel_for( "uniform log density", policy, KOKKOS_CLASS_LAMBDA (const int& j) {
         bool in_bounds1 = (pts(0, j) >= 0.0) && (pts(0, j) <= euler);
         bool in_bounds2 = (pts(1, j) >= 0.0) && (pts(1, j) <= euler);
         output(j) = in_bounds1 && in_bounds2 ? -2 : -std::numeric_limits<double>::infinity();
@@ -51,7 +52,8 @@ void LogDensityImpl(StridedMatrix<const double, MemorySpace> const &pts, Strided
 
 void LogDensityInputGradImpl(StridedMatrix<const double, MemorySpace> const &pts, StridedMatrix<double, MemorySpace> output) override {
     unsigned int N = pts.extent(1);
-    Kokkos::parallel_for( "uniform grad log density", N, KOKKOS_LAMBDA (const int& j) {
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy(0, N);
+    Kokkos::parallel_for( "uniform grad log density", policy, KOKKOS_CLASS_LAMBDA (const int& j) {
         output(0,j) = 0.;
         output(1,j) = 0.;
     });
diff --git a/tests/Distributions/Test_GaussianDistribution.cpp b/tests/Distributions/Test_GaussianDistribution.cpp
index e3baf804..a9bd5b75 100644
--- a/tests/Distributions/Test_GaussianDistribution.cpp
+++ b/tests/Distributions/Test_GaussianDistribution.cpp
@@ -3,6 +3,7 @@
 #include "MParT/Utilities/ArrayConversions.h"
 #include "MParT/Distributions/GaussianSamplerDensity.h"
 #include "Test_Distributions_Common.h"
+#include <iostream>
 
 using namespace mpart;
 using namespace Catch;
@@ -15,8 +16,10 @@ void TestGaussianLogPDF(StridedMatrix<double, Kokkos::HostSpace> samples, Stride
 
     double offset = -0.9189385332046728*dim; // -log(2*pi)*dim/2
     offset -= 0.5*logdet_cov;
+
+    Kokkos::RangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space> policy {0, N_samp};
     // Calculate difference of samples_pdf and the true pdf in place
-    Kokkos::parallel_for(N_samp, KOKKOS_LAMBDA(const int i) {
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i) {
         double norm = 0.;
         for(int j = 0; j < dim; j++) {
             norm += samples(j, i)*samples(j, i);
@@ -66,7 +69,8 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") {
         dist->SampleImpl(samples);
         dist->LogDensityImpl(samples, samples_pdf);
         dist->LogDensityInputGradImpl(samples, samples_gradpdf);
-        Kokkos::parallel_for(dim, KOKKOS_LAMBDA(const int i) {
+        Kokkos::RangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space> policy {0, dim};
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i) {
             for(int j = 0; j < N_samp; j++) {
                 samples(i, j) -= 1.0;
             }
@@ -76,7 +80,7 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") {
     }
 
     Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::HostSpace> covar("covar", dim, dim);
-    auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {dim, dim});
+    Kokkos::MDRangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space, Kokkos::Rank<2>> policy ({0, 0}, {dim, dim});
     Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j) {
         covar(i, j) = ((double) (i == j))*covar_diag_val;
     });
@@ -90,7 +94,7 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") {
         dist->SampleImpl(samples);
         dist->LogDensityImpl(samples, samples_pdf);
         dist->LogDensityInputGradImpl(samples, samples_gradpdf);
-        policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {dim, N_samp});
+        policy = Kokkos::MDRangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space, Kokkos::Rank<2>>({0, 0}, {dim, N_samp});
         Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j) {
             samples(i, j) /= std::sqrt(covar_diag_val);
         });
@@ -107,7 +111,7 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") {
         dist->SampleImpl(samples);
         dist->LogDensityImpl(samples, samples_pdf);
         dist->LogDensityInputGradImpl(samples, samples_gradpdf);
-        policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {dim, N_samp});
+        policy = Kokkos::MDRangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space, Kokkos::Rank<2>>({0, 0}, {dim, N_samp});
         Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j) {
             samples(i, j) -= 1.0;
             samples(i, j) /= std::sqrt(covar_diag_val);
diff --git a/tests/Distributions/Test_TransportDensity.cpp b/tests/Distributions/Test_TransportDensity.cpp
index 4a85abb5..5091f34f 100644
--- a/tests/Distributions/Test_TransportDensity.cpp
+++ b/tests/Distributions/Test_TransportDensity.cpp
@@ -119,7 +119,9 @@ TEST_CASE( "Testing Pullback/Pushforward density", "[PullbackPushforwardDensity]
         for(int i = 0; i < map->numCoeffs; i++) {
             map->Coeffs()(i) += fdstep;
             auto logDensityPerturb = pullback.LogDensity(pullbackSamples);
-            Kokkos::parallel_for("Test LogDensityCoeffGrad", N_samp, KOKKOS_LAMBDA(const int j) {
+	    Kokkos::RangePolicy<typename MemoryToExecution<Kokkos::HostSpace>::Space> policy(0,N_samp);
+
+            Kokkos::parallel_for("Test LogDensityCoeffGrad", policy, KOKKOS_LAMBDA(const int j) {
                 logDensityPerturb(j) -= samplesDensity(j);
                 logDensityPerturb(j) /= fdstep;
                 logDensityPerturb(j) -= coeffGrad(i, j);
@@ -131,4 +133,4 @@ TEST_CASE( "Testing Pullback/Pushforward density", "[PullbackPushforwardDensity]
             map->Coeffs()(i) = (double) (i + 1);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/Distributions/Test_TransportSampler.cpp b/tests/Distributions/Test_TransportSampler.cpp
index 38ddf498..d1a48996 100644
--- a/tests/Distributions/Test_TransportSampler.cpp
+++ b/tests/Distributions/Test_TransportSampler.cpp
@@ -11,6 +11,8 @@
 using namespace mpart;
 using namespace Catch;
 
+using MemorySpace = Kokkos::HostSpace;
+
 TEST_CASE( "Testing Pullback/Pushforward sampling", "[PullbackPushforwardSampler]") {
     unsigned int dim = 2;
     unsigned int N_samp = 10000;
@@ -41,7 +43,7 @@ TEST_CASE( "Testing Pullback/Pushforward sampling", "[PullbackPushforwardSampler
         auto pullbackSamples = pullback.Sample(N_samp);
 
         // Calculate the pullback and pushforward density error
-        auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {N_samp, dim});
+        auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space>({0, 0}, {N_samp, dim});
         Kokkos::parallel_for("Normalize Pullback Samples", policy, KOKKOS_LAMBDA(const int j, const int i) {
             pullbackSamples(i,j) *= diag_el;
             pullbackSamples(i,j) += mean;
@@ -58,7 +60,7 @@ TEST_CASE( "Testing Pullback/Pushforward sampling", "[PullbackPushforwardSampler
         auto pushforwardSamples = pushforward.Sample(N_samp);
 
         // Calculate the pullback and pushforward density error
-        auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {N_samp, dim});
+        auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space>({0, 0}, {N_samp, dim});
         Kokkos::parallel_for("Normalize Samples", policy, KOKKOS_LAMBDA(const int j, const int i) {
             pushforwardSamples(i,j) -= mean;
             pushforwardSamples(i,j) /= diag_el;
diff --git a/tests/MultiIndices/Test_MultiIndexSet.cpp b/tests/MultiIndices/Test_MultiIndexSet.cpp
index e393ad2d..7f7ffd9c 100644
--- a/tests/MultiIndices/Test_MultiIndexSet.cpp
+++ b/tests/MultiIndices/Test_MultiIndexSet.cpp
@@ -3,6 +3,7 @@
 
 #include "MParT/MultiIndices/FixedMultiIndexSet.h"
 #include "MParT/MultiIndices/MultiIndexSet.h"
+#include "MParT/Utilities/GPUtils.h"
 
 using namespace mpart;
 
@@ -24,10 +25,12 @@ TEST_CASE( "Testing the FixedMultiIndexSet class", "[FixedMultiIndexSet]" ) {
     FixedMultiIndexSet<Kokkos::HostSpace> multiSet_fixed = multiSet_original.Fix(true);
     MultiIndexSet multiSet_reconstructed = multiSet_fixed.Unfix();
     REQUIRE(multiSet_original.Size() == multiSet_reconstructed.Size());
-    REQUIRE(multiSet_original.MaxOrders() == multiSet_reconstructed.MaxOrders());
+    bool same_max_orders = multiSet_original.MaxOrders() == multiSet_reconstructed.MaxOrders();
+    REQUIRE(same_max_orders);
     std::vector<unsigned int> diagonal_idxs_ref = multiSet_reconstructed.NonzeroDiagonalEntries();
     std::vector<unsigned int> diagonal_idxs = multiSet_fixed.NonzeroDiagonalEntries();
-    REQUIRE(diagonal_idxs_ref == diagonal_idxs);
+    bool diag_idxs_pass = diagonal_idxs_ref == diagonal_idxs;
+    REQUIRE(diag_idxs_pass);
 }
 
 TEST_CASE( "Testing dimension sorting in the FixedMultiIndexSet class", "[FixedMultiIndexSetSorting]" ) {
@@ -50,7 +53,7 @@ TEST_CASE( "Testing dimension sorting in the FixedMultiIndexSet class", "[FixedM
     nzDims(3) = 1; nzOrders(3)=2; // The 2 in [1,2]
 
     FixedMultiIndexSet<Kokkos::HostSpace> mset(dim, nzStarts, nzDims, nzOrders);
-
+    
     CHECK(mset.nzDims(3)>mset.nzDims(2));
     CHECK(mset.nzOrders(3)==2);
     CHECK(mset.nzOrders(2)==1);
@@ -61,12 +64,12 @@ TEST_CASE( "Testing dimension sorting in the FixedMultiIndexSet class", "[FixedM
     nzDims(1) = 1; nzOrders(1)=1; // [0,1]
     nzDims(2) = 1; nzOrders(2)=2; // The 2 in [1,2]
     nzDims(3) = 0; nzOrders(3)=1; // The 1 in [1,2].  Internal to the FixedMultiIndexSet, this should come before 2.
-    
+
     FixedMultiIndexSet<Kokkos::HostSpace> mset2(dim, nzStarts, nzDims, nzOrders);
 
-    CHECK(mset.nzDims(3)>mset.nzDims(2));
-    CHECK(mset.nzOrders(3)==2);
-    CHECK(mset.nzOrders(2)==1);
+    CHECK(mset2.nzDims(3)>mset2.nzDims(2));
+    CHECK(mset2.nzOrders(3)==2);
+    CHECK(mset2.nzOrders(2)==1);
     
 }
 
@@ -111,7 +114,7 @@ TEST_CASE( "Testing the FixedMultiIndexSet class with anisotropic degrees", "[An
 }
 
 
-#if defined(KOKKOS_ENABLE_CUDA ) || defined(KOKKOS_ENABLE_SYCL)
+#if defined( MPART_ENABLE_GPU)
 
 TEST_CASE( "Testing the FixedMultiIndexSet class copy to device", "[FixedMultiIndexSet]" ) {
 
@@ -120,7 +123,7 @@ TEST_CASE( "Testing the FixedMultiIndexSet class copy to device", "[FixedMultiIn
 
     FixedMultiIndexSet<Kokkos::HostSpace> mset(dim,maxOrder);
 
-    FixedMultiIndexSet<Kokkos::DefaultExecutionSpace::memory_space> deviceSet = mset.ToDevice<Kokkos::DefaultExecutionSpace::memory_space>();
+    FixedMultiIndexSet<DeviceSpace> deviceSet = mset.ToDevice<DeviceSpace>();
 
 }
 #endif
@@ -506,8 +509,10 @@ TEST_CASE("Testing the MultiIndexSet class", "[MultiIndexSet]" ) {
         std::sort(inds.begin(), inds.end());
         std::sort(inds_fixed.begin(), inds_fixed.end());
         std::sort(inds_fixed2.begin(), inds_fixed2.end());
-        REQUIRE( inds == inds_fixed );
-        REQUIRE( inds == inds_fixed2 );
+        bool inds_same_fixed = inds == inds_fixed;
+        REQUIRE( inds_same_fixed );
+        bool inds_same_fixed2 = inds == inds_fixed2;
+        REQUIRE( inds_same_fixed2 );
         MultiIndexSet full_set = MultiIndexSet::CreateTotalOrder(dim, maxOrder, MultiIndexLimiter::NonzeroDiag());
         inds = full_set.NonzeroDiagonalEntries();
         REQUIRE( inds.size() == full_set.Size() );
diff --git a/tests/Test_BasisEvaluator.cpp b/tests/Test_BasisEvaluator.cpp
index 77226fea..ac7daed1 100644
--- a/tests/Test_BasisEvaluator.cpp
+++ b/tests/Test_BasisEvaluator.cpp
@@ -5,13 +5,13 @@ using namespace mpart;
 using namespace Catch;
 
 struct TestEvaluators {
-    virtual void EvaluateAll(double* output, int max_order, double input) const {
+    KOKKOS_INLINE_FUNCTION virtual void EvaluateAll(double* output, int max_order, double input) const {
         assert(false);
     };
-    virtual void EvaluateDerivatives(double* output, double* output_diff, int max_order, double input) const {
+    KOKKOS_INLINE_FUNCTION virtual void EvaluateDerivatives(double* output, double* output_diff, int max_order, double input) const {
         assert(false);
     };
-    virtual void EvaluateSecondDerivatives(double* output, double* output_diff, double* output_diff2, int max_order, double input) const {
+    KOKKOS_INLINE_FUNCTION virtual void EvaluateSecondDerivatives(double* output, double* output_diff, double* output_diff2, int max_order, double input) const {
         assert(false);
     };
     virtual ~TestEvaluators() = default;
@@ -145,4 +145,4 @@ TEST_CASE( "Testing basis evaluators", "[BasisEvaluators]") {
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/Test_InnerMarginalAffineMap.cpp b/tests/Test_InnerMarginalAffineMap.cpp
index 3dfebf2f..a41b504f 100644
--- a/tests/Test_InnerMarginalAffineMap.cpp
+++ b/tests/Test_InnerMarginalAffineMap.cpp
@@ -225,7 +225,6 @@ TEST_CASE( "Test InnerMarginalAffineMap", "[InnerMarginalAffineMap]") {
     SECTION("MoveCoeffs") {
         unsigned int maxOrder = 3;
         unsigned int outputDim = 2;
-        unsigned int numPts = 20;
         auto trimap = MapFactory::CreateTriangular<MemorySpace>(inputDim, outputDim, maxOrder, MapOptions());
 
         // Penalize high order k
diff --git a/tests/Test_LinearAlgebra.cpp b/tests/Test_LinearAlgebra.cpp
index d51ded4f..effbd00d 100644
--- a/tests/Test_LinearAlgebra.cpp
+++ b/tests/Test_LinearAlgebra.cpp
@@ -150,7 +150,7 @@ TEST_CASE( "Testing LU Factorization", "LinearAlgebra_LU" ) {
     B(1,0) = 2.; B(1,1) = 5.;
     B(2,0) = 3.; B(2,1) = 6.;
 
-    auto constA = ToConstKokkos(A.data(), 3, 3);
+    Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::HostSpace> constA = A;
     auto eigA = ConstKokkosToMat(constA);
     auto eigB = KokkosToMat(B);
     unsigned int nrows = eigA.rows();
@@ -197,7 +197,7 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) {
     B(1,0) = 2.; B(1,1) = 5.;
     B(2,0) = 3.; B(2,1) = 6.;
 
-    auto constA = ToConstKokkos(A.data(), 3, 3);
+    Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::HostSpace> constA = A;
     auto eigA = ConstKokkosToMat(constA);
     auto eigB = KokkosToMat(B);
     unsigned int nrows = eigA.rows();
@@ -364,7 +364,7 @@ TEST_CASE( "Testing LU Factorization on Device", "LinearAlgebra_LUDevice" ) {
     B(1,0) = 2.; B(1,1) = 5.;
     B(2,0) = 3.; B(2,1) = 6.;
 
-    auto constA = ToConstKokkos(A.data(), 3, 3);
+    Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::HostSpace> constA = A;
     auto constA_d = ToDevice<mpart::DeviceSpace>(constA);
     auto B_d = ToDevice<mpart::DeviceSpace>(B);
     auto eigA = ConstKokkosToMat(constA);
@@ -390,7 +390,7 @@ TEST_CASE( "Testing LU Factorization on Device", "LinearAlgebra_LUDevice" ) {
     }
     SECTION("Solve LU out of place") {
         PartialPivLU<mpart::DeviceSpace> Alu_d(constA_d);
-        auto C_d = Alu.solve(B_d);
+        auto C_d = Alu_d.solve(B_d);
         auto C_h = ToHost(C_d);
         for(unsigned int j=0; j<C_h.extent(1); ++j){
             for(unsigned int i=0; i<C_h.extent(0); ++i){
@@ -399,12 +399,12 @@ TEST_CASE( "Testing LU Factorization on Device", "LinearAlgebra_LUDevice" ) {
         }
     }
     SECTION("Compute Determinant") {
-        PartialPivLU<Kokkos::HostSpace> Alu_d(constA_d);
+        PartialPivLU<mpart::DeviceSpace> Alu_d(constA_d);
         CHECK(Alu_d.determinant() == Approx(eigA.determinant()).epsilon(1e-14).margin(1e-14));
     }
 }
 
-TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) {
+TEST_CASE( "Testing Cholesky Factorization on Device", "LinearAlgebra_Cholesky_Deivice" ) {
     Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::HostSpace> A("A", 3, 3);
     Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::HostSpace> B_h("B", 3, 2);
     A(0,0) = 36.; A(0,1) = 18.; A(0,2) = 6.;
@@ -415,21 +415,21 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) {
     B_h(1,0) = 2.; B_h(1,1) = 5.;
     B_h(2,0) = 3.; B_h(2,1) = 6.;
 
-    auto constA_h = ToConstKokkos(A.data(), 3, 3);
+    Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::HostSpace> constA_h = A;
     auto constA_d = ToDevice<mpart::DeviceSpace>(constA_h);
-    auto B_d = ToDevice<mpart::DeviceSpace>(B);
+    auto B_d = ToDevice<mpart::DeviceSpace>(B_h);
     auto eigA = ConstKokkosToMat(constA_h);
-    auto eigB = KokkosToMat(B);
+    auto eigB = KokkosToMat(B_h);
     Eigen::MatrixXd eigX = eigA.llt().solve(eigB);
     Eigen::MatrixXd eigY = eigA.llt().matrixL().solve(eigB);
 
-    SECTION("Compute Cholesky") {
-        Cholesky<Kokkos::HostSpace> Achol_d(constA_d);
+    SECTION("Compute Cholesky on Device") {
+        Cholesky<mpart::DeviceSpace> Achol_d(constA_d);
     }
-    SECTION("Solve Cholesky inplace") {
+    SECTION("Solve Cholesky inplace on Device") {
         Kokkos::View<double**, Kokkos::LayoutLeft, mpart::DeviceSpace> C_d("C", 3, 2);
         Kokkos::deep_copy(C_d, B_d);
-        Cholesky<Kokkos::HostSpace> Achol_d(constA_d);
+        Cholesky<mpart::DeviceSpace> Achol_d(constA_d);
         Achol_d.solveInPlace(C_d);
         auto C_h = ToHost(C_d);
         for(unsigned int j=0; j<C_h.extent(1); ++j){
@@ -438,10 +438,10 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) {
             }
         }
     }
-    SECTION("Solve Cholesky L inplace") {
+    SECTION("Solve Cholesky L inplace on Device") {
         Kokkos::View<double**, Kokkos::LayoutLeft, mpart::DeviceSpace> C_d("C", 3, 2);
         Kokkos::deep_copy(C_d, B_d);
-        Cholesky<Kokkos::HostSpace> Achol_d(constA_d);
+        Cholesky<mpart::DeviceSpace> Achol_d(constA_d);
         Achol_d.solveInPlaceL(C_d);
         auto C_h = ToHost(C_d);
         for(unsigned int j=0; j<C_h.extent(1); ++j){
@@ -450,9 +450,9 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) {
             }
         }
     }
-    SECTION("Solve Cholesky out of place") {
-        Cholesky<Kokkos::HostSpace> Achol_d(constA_d);
-        auto C_d = Achol.solve(B_d);
+    SECTION("Solve Cholesky out of place on Device") {
+        Cholesky<mpart::DeviceSpace> Achol_d(constA_d);
+        auto C_d = Achol_d.solve(B_d);
         auto C_h = ToHost(C_d);
         for(unsigned int j=0; j<C_h.extent(1); ++j){
             for(unsigned int i=0; i<C_h.extent(0); ++i){
@@ -460,9 +460,9 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) {
             }
         }
     }
-    SECTION("Compute Determinant") {
-        Cholesky<Kokkos::HostSpace> Achol_d(constA_d);
+    SECTION("Compute Determinant on Device") {
+        Cholesky<mpart::DeviceSpace> Achol_d(constA_d);
         CHECK(Achol_d.determinant() == Approx(eigA.determinant()).epsilon(1e-14).margin(1e-14));
     }
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/tests/Test_MapFactory.cpp b/tests/Test_MapFactory.cpp
index 02bd7bfc..33a16387 100644
--- a/tests/Test_MapFactory.cpp
+++ b/tests/Test_MapFactory.cpp
@@ -5,6 +5,7 @@
 #include "MParT/TriangularMap.h"
 #include "MParT/MultiIndices/MultiIndexSet.h"
 #include "MParT/MultiIndices/FixedMultiIndexSet.h"
+#include "MParT/Utilities/KokkosSpaceMappings.h"
 
 #include <unordered_map>
 #include <string>
@@ -225,7 +226,7 @@ TEST_CASE( "Testing factory method for Sigmoid Component", "[MapFactorySigmoidCo
     REQUIRE(func != nullptr);
     unsigned int numPts = 100;
     Kokkos::View<double**,MemorySpace> pts("Points", inputDim, numPts);
-    Kokkos::MDRangePolicy<Kokkos::Rank<2>> policy({0,0}, {inputDim, numPts});
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>, typename MemoryToExecution<MemorySpace>::Space> policy({0,0}, {inputDim, numPts});
     Kokkos::parallel_for("Fill points", policy, KOKKOS_LAMBDA(const int i, const int j){
         pts(i,j) = double(j*i)/double((inputDim)*(numPts-1));
     });
@@ -237,7 +238,8 @@ TEST_CASE( "Testing factory method for Sigmoid Component", "[MapFactorySigmoidCo
     CHECK(eval.extent(0)==1);
     SECTION("Check Gradient") {
         Kokkos::View<double**, MemorySpace> sens ( "Sensitivities", 1, numPts);
-        Kokkos::parallel_for("fill sensitivities", numPts, KOKKOS_LAMBDA(const int i){
+        Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy_pts {0, numPts};
+        Kokkos::parallel_for("fill sensitivities", policy_pts, KOKKOS_LAMBDA(const int i){
             sens(0,i) = 1.0;
         });
         Kokkos::View<double**, MemorySpace> grad = func->Gradient(pts, sens);
@@ -245,7 +247,7 @@ TEST_CASE( "Testing factory method for Sigmoid Component", "[MapFactorySigmoidCo
         CHECK(grad.extent(1)==numPts);
         double fd_step = 1e-6;
         for(int i = 0; i < inputDim; i++){
-            Kokkos::parallel_for(numPts, KOKKOS_LAMBDA(const int j){
+            Kokkos::parallel_for(policy_pts, KOKKOS_LAMBDA(const int j){
                 if(i > 0) pts(i-1,j) -= fd_step;
                 pts(i,j) += fd_step;
             });
diff --git a/tests/Test_MapObjective.cpp b/tests/Test_MapObjective.cpp
index e878c5d8..a89f61ac 100644
--- a/tests/Test_MapObjective.cpp
+++ b/tests/Test_MapObjective.cpp
@@ -46,9 +46,7 @@ TEST_CASE( "Test KLMapObjective", "[KLMapObjective]") {
     const double coeff_def = 1.;
 
     auto map = MapFactory::CreateTriangular<Kokkos::HostSpace>(dim, dim, 2);
-    Kokkos::parallel_for("Fill coeffs", map->numCoeffs, KOKKOS_LAMBDA(const unsigned int i){
-        map->Coeffs()(i) = coeff_def;
-    });
+    Kokkos::deep_copy(map->Coeffs(), coeff_def);
 
     SECTION("CoeffGradImpl"){
         double fd_step = 1e-6;
diff --git a/tests/Test_MonotoneComponent.cpp b/tests/Test_MonotoneComponent.cpp
index 4cd31f77..87700998 100644
--- a/tests/Test_MonotoneComponent.cpp
+++ b/tests/Test_MonotoneComponent.cpp
@@ -263,12 +263,11 @@ TEST_CASE( "MonotoneIntegrand2d", "[MonotoneIntegrand2d]") {
     }
 
     SECTION("Integrand Input Gradient") {
-        double fdStep = 1e-5;
 
         MonotoneIntegrand<MultivariateExpansionWorker<BasisEvaluator<BasisHomogeneity::Homogeneous,ProbabilistHermite>>, Exp, Kokkos::View<double*,HostSpace>,Kokkos::View<double*,HostSpace>> integrand(&cache[0], expansion, pt, coeffs, DerivativeFlags::Input, 0.0);
 
         // Evaluate the expansion
-        double df, d2f;
+        double df;
         Kokkos::View<double*,HostSpace> fval("Integrand", dim+1);
         Kokkos::View<double*,HostSpace> coeffGrad("Coefficient Gradient", dim);
 
@@ -872,7 +871,8 @@ TEST_CASE("Testing MonotoneComponent CoeffGrad and LogDeterminantCoeffGrad", "[M
     SECTION("DiagonalCoeffIndices") {
         std::vector<unsigned int> indices = comp.DiagonalCoeffIndices();
         std::vector<unsigned int> indices_ref = expansion.NonzeroDiagonalEntries();
-        REQUIRE(indices == indices_ref);
+        bool same_indices = indices == indices_ref;
+        REQUIRE(same_indices);
     }
 }
 
@@ -911,7 +911,8 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") {
     SECTION("Integrand Only") {
         Kokkos::View<double*, DeviceSpace> dres("result", 1);
 
-        Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){
+    	Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy(0,1);
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){
             MonotoneIntegrand<decltype(expansion), Exp, decltype(dpt), decltype(dcoeffs), DeviceSpace> integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::None, 0.0);
             integrand(0.5, &dres(0));
         });
@@ -924,7 +925,8 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") {
     SECTION("Integrand Derivative") {
         Kokkos::View<double*, DeviceSpace> dres("result", 2);
 
-        Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){
+    	Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy(0,1);
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){
             MonotoneIntegrand<decltype(expansion), Exp, decltype(dpt), decltype(dcoeffs), DeviceSpace> integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::Diagonal, 0.0);
             integrand(0.5, dres.data());
         });
@@ -940,7 +942,8 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") {
         Kokkos::View<double*, DeviceSpace> dres_fd("result_fd", hset.Size());
         Kokkos::View<double*, DeviceSpace> testVal("integrand", 1+hset.Size());
 
-        Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){
+    	Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy(0,1);
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){
 
             MonotoneIntegrand<decltype(expansion), Exp, decltype(dpt), decltype(dcoeffs), DeviceSpace> integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::Parameters, 0.0);
             MonotoneIntegrand<decltype(expansion), Exp, decltype(dpt), decltype(dcoeffs), DeviceSpace> integrand2(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::None, 0.0);
@@ -974,8 +977,9 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") {
         Kokkos::View<double*, DeviceSpace> dres_fd("result_fd", hset.Size());
         Kokkos::View<double*, DeviceSpace> testVal("integrand", 1+hset.Size());
         Kokkos::View<double*, DeviceSpace> workspace("workspace", hset.Size());
-
-        Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){
+	
+	Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy(0,1);
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){
 
             MonotoneIntegrand<decltype(expansion), Exp, decltype(dpt), decltype(dcoeffs), DeviceSpace> integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::MixedCoeff, 0.0, workspace);
             MonotoneIntegrand<decltype(expansion), Exp, decltype(dpt), decltype(dcoeffs), DeviceSpace> integrand2(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::Diagonal, 0.0);
@@ -1042,7 +1046,8 @@ TEST_CASE( "Testing MonotoneComponent::EvaluateSingle on Device", "[MonotoneComp
 
     Kokkos::View<double*, DeviceSpace> dres("Device Evaluation", 1);
     // Run the fill cache funciton, using a parallel_for loop to ensure it's run on the device
-    Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){
+    Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy(0,1);
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){
         dexpansion.FillCache1(dcache.data(), dpt, DerivativeFlags::None);
         dres(0) = MonotoneComponent<decltype(dexpansion),Exp, decltype(quad), DeviceSpace>::EvaluateSingle(dcache.data(), workspace.data(), dpt, dpt(dim-1), dcoeffs, quad, dexpansion);
     });
@@ -1145,4 +1150,4 @@ TEST_CASE( "Testing 1d monotone component evaluation on device", "[MonotoneCompo
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/tests/Test_MultivariateExpansion.cpp b/tests/Test_MultivariateExpansion.cpp
index b4856493..04ed1da9 100644
--- a/tests/Test_MultivariateExpansion.cpp
+++ b/tests/Test_MultivariateExpansion.cpp
@@ -24,7 +24,8 @@ TEST_CASE( "Testing multivariate expansion", "[MultivariateExpansion]") {
     CHECK(func.numCoeffs == (mset.Size()*outDim));
     std::vector<unsigned int> nonzeroDiagTerms_ref = mset.NonzeroDiagonalEntries();
     std::vector<unsigned int> diagCoeffs = func.DiagonalCoeffIndices();
-    REQUIRE(nonzeroDiagTerms_ref == diagCoeffs);
+    
+    REQUIRE((nonzeroDiagTerms_ref == diagCoeffs));
 
     Kokkos::View<double*,Kokkos::HostSpace> coeffs("coefficients", func.numCoeffs);
     for(unsigned int i=0; i<func.numCoeffs; ++i)
diff --git a/tests/Test_MultivariateExpansionWorker.cpp b/tests/Test_MultivariateExpansionWorker.cpp
index aba4519c..18fb47d0 100644
--- a/tests/Test_MultivariateExpansionWorker.cpp
+++ b/tests/Test_MultivariateExpansionWorker.cpp
@@ -251,7 +251,8 @@ TEST_CASE( "Testing multivariate expansion on device", "[MultivariateExpansionWo
     hexpansion.FillCache1(hcache.data(), hpt, DerivativeFlags::None);
 
     // Run the fill cache funciton, using a parallel_for loop to ensure it's run on the device
-    Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){
+    Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy(0,1);
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){
         dexpansion.FillCache1(dcache.data(), dpt, DerivativeFlags::None);
         dexpansion.FillCache2(dcache.data(), dpt, 0.5 * dpt(dim-1), DerivativeFlags::None);
     });
@@ -267,4 +268,4 @@ TEST_CASE( "Testing multivariate expansion on device", "[MultivariateExpansionWo
     }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/tests/Test_OrthogonalPolynomials.cpp b/tests/Test_OrthogonalPolynomials.cpp
index 10a2d32b..2e8d7e73 100644
--- a/tests/Test_OrthogonalPolynomials.cpp
+++ b/tests/Test_OrthogonalPolynomials.cpp
@@ -251,9 +251,9 @@ TEST_CASE( "Device Hermite polynomial evaluation", "[PhysicistHermiteDevice]" )
     auto xs_device = ToDevice<Kokkos::DefaultExecutionSpace::memory_space>(xs_host);
 
     Kokkos::View<double*,Kokkos::DefaultExecutionSpace::memory_space> ys_device("evals", xs.size());
-
+    Kokkos::RangePolicy<typename Kokkos::DefaultExecutionSpace::execution_space> policy(0,xs.size());
     for(unsigned int p=0; p<10; ++p){
-        Kokkos::parallel_for(xs.size(), KOKKOS_LAMBDA(const size_t ind){
+        Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const size_t ind){
             ys_device(ind) = poly.Evaluate(p, xs_device(ind));
         });
         
@@ -267,4 +267,4 @@ TEST_CASE( "Device Hermite polynomial evaluation", "[PhysicistHermiteDevice]" )
 }
 
 
-#endif 
\ No newline at end of file
+#endif 
diff --git a/tests/Test_PositiveBijectors.cpp b/tests/Test_PositiveBijectors.cpp
index 10f1b372..ed535398 100644
--- a/tests/Test_PositiveBijectors.cpp
+++ b/tests/Test_PositiveBijectors.cpp
@@ -4,6 +4,7 @@
 
 #include "MParT/PositiveBijectors.h"
 #include "MParT/Utilities/ArrayConversions.h"
+#include "MParT/Utilities/KokkosSpaceMappings.h"
 
 using namespace mpart;
 using namespace Catch;
@@ -50,10 +51,11 @@ TEST_CASE( "Testing soft plus function on device.", "[SofPlusDevice]" ) {
 
     auto xs_device = ToDevice<Kokkos::DefaultExecutionSpace::memory_space>(xs_host);
 
-    Kokkos::View<double*,Kokkos::DefaultExecutionSpace::memory_space> ys_device("ys_device", xs_host.extent(0));
-    Kokkos::View<double*,Kokkos::DefaultExecutionSpace::memory_space> deriv_device("deriv_device", xs_host.extent(0));
-    
-    Kokkos::parallel_for(xs_host.size(), KOKKOS_LAMBDA(const size_t ind){
+    unsigned int N_p = xs_host.extent(0);
+    Kokkos::View<double*,DeviceSpace> ys_device("ys_device", N_p);
+    Kokkos::View<double*,DeviceSpace> deriv_device("deriv_device", N_p);
+    Kokkos::RangePolicy<typename MemoryToExecution<DeviceSpace>::Space> policy {0u, N_p};
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const size_t ind){
         ys_device(ind) = SoftPlus::Evaluate(xs_device(ind));
         deriv_device(ind) = SoftPlus::Derivative(xs_device(ind));
     });
diff --git a/tests/Test_Quadrature.cpp b/tests/Test_Quadrature.cpp
index a27473c2..2f5d0265 100644
--- a/tests/Test_Quadrature.cpp
+++ b/tests/Test_Quadrature.cpp
@@ -333,7 +333,6 @@ TEST_CASE( "Testing CC Quadrature on device", "[ClenshawCurtisDevice]" ) {
     unsigned int numRepeats = 320;
 
     // Set tolerance for tests
-    double testTol = 1e-8;
     double lb = 0;
     double ub = 1.0;
 
@@ -383,7 +382,6 @@ TEST_CASE( "Testing Adaptive Simpson Quadrature on device", "[AdaptiveSimpsonDev
     double absTol = 1e-6;
     
     // Set tolerance for tests
-    double testTol = 1e-4;
     double lb = 0;
     double ub = 1.0;
 
@@ -432,7 +430,6 @@ TEST_CASE( "Testing Adaptive Clenshaw Curtis on device", "[AdaptiveCCDevice]" )
     unsigned int order = 8;
 
     // Set tolerance for tests
-    double testTol = 1e-4;
     double lb = 0;
     double ub = 1.0;
 
diff --git a/tests/Test_RectifiedMultivariateExpansion.cpp b/tests/Test_RectifiedMultivariateExpansion.cpp
index ec1651d6..a045e008 100644
--- a/tests/Test_RectifiedMultivariateExpansion.cpp
+++ b/tests/Test_RectifiedMultivariateExpansion.cpp
@@ -288,7 +288,6 @@ TEMPLATE_TEST_CASE("Multiple Sigmoid RectifiedMultivariateExpansion","[multi_sig
 
     // Set up sigmoids
     const int num_sigmoids = 3;
-    const int order = num_sigmoids+1+2;
     const int param_length = 2 + num_sigmoids*(num_sigmoids+1)/2;
     Kokkos::View<double*, MemorySpace> center("Sigmoid Center", param_length);
     Kokkos::View<double*, MemorySpace> width("Sigmoid Width", param_length);
diff --git a/tests/Test_RootFinding.cpp b/tests/Test_RootFinding.cpp
index e9a68cb1..c811da6d 100644
--- a/tests/Test_RootFinding.cpp
+++ b/tests/Test_RootFinding.cpp
@@ -70,7 +70,7 @@ TEST_CASE( "RootFindingUtils", "[RootFindingUtils]") {
         CHECK(info==0);
     }
     SECTION("Test Inverse Flat") {
-        double xd = 0.5, yd = 0.0;
+        double yd = 0.0;
         double x0 = 0.0, xtol = 1e-5, ftol = 1e-5;
         auto f = [](double x){return -1.0;};
         int info = 0;
diff --git a/tests/Test_Serialization.cpp b/tests/Test_Serialization.cpp
index d3ad618b..26d6c5a6 100644
--- a/tests/Test_Serialization.cpp
+++ b/tests/Test_Serialization.cpp
@@ -289,7 +289,6 @@ TEST_CASE("Test serialization of multivariate expansion worker.", "[Serializatio
 
 TEST_CASE("Test serialization of monotone component.", "[Serialization]"){
     
-    double testTol = 1e-7;
     unsigned int maxDegree = 2;
     unsigned int dim = 1;
 
diff --git a/tests/Test_Sigmoid.cpp b/tests/Test_Sigmoid.cpp
index 80c88866..f0beee4e 100644
--- a/tests/Test_Sigmoid.cpp
+++ b/tests/Test_Sigmoid.cpp
@@ -1,89 +1,116 @@
 #include <catch2/catch_all.hpp>
 #include "MParT/Sigmoid.h"
+#include <iostream>
 
 using namespace mpart;
 using namespace Catch;
 using namespace Catch::Matchers;
-using MemorySpace = Kokkos::HostSpace;
 
 void CheckNearZero(double calc, double ref, double delta=1e-12, double tol=1e-12) {
     if(ref == 0.) REQUIRE_THAT(calc, WithinAbs(0., tol));
     else REQUIRE_THAT(calc, WithinRel(ref, delta));
 }
 
-template<typename Function>
+template<typename MemorySpace, typename Function>
 void TestSigmoidGradients(Function Sigmoid, unsigned int N_grad_points, double fd_delta) {
     Kokkos::View<double*, MemorySpace> gradPts("Gradient points", N_grad_points);
     Kokkos::View<double*, MemorySpace> gradPts_plus_delta("Gradient points plus delta", N_grad_points);
-    Kokkos::parallel_for(N_grad_points, KOKKOS_LAMBDA(unsigned int point_index) {
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0u, N_grad_points};
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(unsigned int point_index) {
         double gradPt = 3.0*(-1.0 + 2*((double) point_index)/((double) N_grad_points-1));
         gradPts(point_index) = gradPt;
         gradPts_plus_delta(point_index) = gradPt + fd_delta;
     });
     int max_order = Sigmoid.GetOrder();
     // Create output array for each possible evaluation
-    double output[max_order+1];
-    double output_pos_fd[max_order+1];
-    double output_deriv[max_order+1];
-    double output_deriv_pos_fd[max_order+1];
-    double output_2deriv[max_order+1];
-    double output_diff[max_order+1];
-    double output_diff_pos_fd[max_order+1];
-    double output_diff_2deriv[max_order+1];
-    double output_diff2[max_order+1];
+    Kokkos::View<double*, MemorySpace> output_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_pos_fd_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_deriv_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_deriv_pos_fd_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_2deriv_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_diff_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_diff_pos_fd_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_diff_2deriv_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::View<double*, MemorySpace> output_diff2_d ("Output", N_grad_points*(max_order+1));
+    Kokkos::parallel_for(policy, KOKKOS_LAMBDA(unsigned int i) {
+        unsigned int offset = i*(max_order+1);
+        Sigmoid.EvaluateAll(output_d.data() + offset, max_order, gradPts(i));
+        Sigmoid.EvaluateAll(output_pos_fd_d.data() + offset, max_order, gradPts_plus_delta(i));
+        Sigmoid.EvaluateDerivatives(output_deriv_d.data() + offset, output_diff_d.data() + offset, max_order, gradPts(i));
+        Sigmoid.EvaluateDerivatives(output_deriv_pos_fd_d.data() + offset, output_diff_pos_fd_d.data() + offset, max_order, gradPts_plus_delta(i));
+        Sigmoid.EvaluateSecondDerivatives(output_2deriv_d.data() + offset, output_diff_2deriv_d.data() + offset, output_diff2_d.data() + offset, max_order, gradPts(i));
+    });
+    Kokkos::fence();
+    Kokkos::View<double*, Kokkos::HostSpace> output = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_pos_fd = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_pos_fd_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_deriv = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_deriv_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_deriv_pos_fd = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_deriv_pos_fd_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_2deriv = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_2deriv_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_diff = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_diff_pos_fd = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff_pos_fd_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_diff_2deriv = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff_2deriv_d);
+    Kokkos::View<double*, Kokkos::HostSpace> output_diff2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff2_d);
+
     for(int i = 0; i < N_grad_points; i++) {
-        Sigmoid.EvaluateAll(output, max_order, gradPts(i));
-        Sigmoid.EvaluateAll(output_pos_fd, max_order, gradPts_plus_delta(i));
-        Sigmoid.EvaluateDerivatives(output_deriv, output_diff, max_order, gradPts(i));
-        Sigmoid.EvaluateDerivatives(output_deriv_pos_fd, output_diff_pos_fd, max_order, gradPts_plus_delta(i));
-        Sigmoid.EvaluateSecondDerivatives(output_2deriv, output_diff_2deriv, output_diff2, max_order, gradPts(i));
         for(int j = 0; j <= max_order; j++) {
-            double fd_diff = (output_pos_fd[j]-output[j])/fd_delta;
-            double fd_diff2 = (output_diff_pos_fd[j] - output_diff[j])/fd_delta;
-            CheckNearZero(output_deriv[j], output[j], 1e-12);
-            CheckNearZero(output_2deriv[j], output[j], 1e-12);
-            CheckNearZero(output_deriv_pos_fd[j], output_pos_fd[j], 1e-12);
-            CheckNearZero(output_diff[j], fd_diff, sqrt(fd_delta));
-            CheckNearZero(output_diff_2deriv[j], fd_diff, sqrt(fd_delta));
-            CheckNearZero(output_diff2[j], fd_diff2, sqrt(fd_delta));
+            double fd_diff = (output_pos_fd(j)-output(j))/fd_delta;
+            double fd_diff2 = (output_diff_pos_fd(j) - output_diff(j))/fd_delta;
+            CheckNearZero(output_deriv(j), output(j), 1e-12);
+            CheckNearZero(output_2deriv(j), output(j), 1e-12);
+            CheckNearZero(output_deriv_pos_fd(j), output_pos_fd(j), 1e-12);
+            CheckNearZero(output_diff(j), fd_diff, sqrt(fd_delta));
+            CheckNearZero(output_diff_2deriv(j), fd_diff, sqrt(fd_delta));
+            CheckNearZero(output_diff2(j), fd_diff2, sqrt(fd_delta));
         }
     }
 }
 
-TEMPLATE_TEST_CASE("Sigmoid1d","[sigmoid1d]", SigmoidTypeSpace::Logistic) {
+using TestType1 = std::pair< SigmoidTypeSpace::Logistic, Kokkos::HostSpace>;
+
+#if defined(MPART_ENABLE_GPU)
+using TestType2 = std::pair< SigmoidTypeSpace::Logistic, DeviceSpace>;
+#else
+using TestType2 = std::pair< SigmoidTypeSpace::Logistic, std::false_type>;
+#endif
+
+TEMPLATE_TEST_CASE("Sigmoid1d","[sigmoid1d]", TestType1, TestType2) {
+if (!std::is_same_v<typename TestType::second_type, std::false_type>) { // Don't worry about testing the host twice
+
+    using SigmoidType = typename TestType::first_type;
+    using MemorySpace = std::conditional_t<std::is_same_v<typename TestType::second_type, std::false_type>, Kokkos::HostSpace, typename TestType::second_type>;
+    using ExecutionSpace = typename MemoryToExecution<MemorySpace>::Space;
     SECTION("Initialization") {
         Kokkos::View<double*, MemorySpace> centers("Sigmoid Centers", 2);
         Kokkos::View<double*, MemorySpace> widths("Sigmoid Widths", 3);
         Kokkos::View<double*, MemorySpace> weights("Sigmoid weights", 2);
 
-        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,TestType>(centers, widths, weights)), std::invalid_argument);
-        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,TestType>(centers, widths)), std::invalid_argument);
+        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,SigmoidType>(centers, widths, weights)), std::invalid_argument);
+        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,SigmoidType>(centers, widths)), std::invalid_argument);
         int N_wrong = 2+1+2+3+5;
         centers = Kokkos::View<double*, MemorySpace>("Sigmoid Centers", N_wrong);
         widths = Kokkos::View<double*, MemorySpace>("Sigmoid widths", N_wrong);
         weights = Kokkos::View<double*, MemorySpace>("Sigmoid weights", N_wrong);
-        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,TestType>(centers, widths, weights)), std::invalid_argument);
-        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,TestType>(centers, widths)), std::invalid_argument);
+        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,SigmoidType>(centers, widths, weights)), std::invalid_argument);
+        CHECK_THROWS_AS((Sigmoid1d<MemorySpace,SigmoidType>(centers, widths)), std::invalid_argument);
         int N_wrong_arr[4] = {0, 1, 2+(1+3)};
         for(int N_wrong : N_wrong_arr) {
             centers = Kokkos::View<double*, MemorySpace>("Sigmoid Centers", N_wrong);
             widths = Kokkos::View<double*, MemorySpace>("Sigmoid widths", N_wrong);
             weights = Kokkos::View<double*, MemorySpace>("Sigmoid weights", N_wrong);
-            CHECK_THROWS_AS((Sigmoid1d<MemorySpace,TestType>(centers, widths, weights)), std::invalid_argument);
-            CHECK_THROWS_AS((Sigmoid1d<MemorySpace,TestType>(centers, widths)), std::invalid_argument);
+            CHECK_THROWS_AS((Sigmoid1d<MemorySpace,SigmoidType>(centers, widths, weights)), std::invalid_argument);
+            CHECK_THROWS_AS((Sigmoid1d<MemorySpace,SigmoidType>(centers, widths)), std::invalid_argument);
         }
         centers = Kokkos::View<double*, MemorySpace>("Sigmoid Centers", 2);
         widths = Kokkos::View<double*, MemorySpace>("Sigmoid widths", 2);
-        Sigmoid1d<MemorySpace,TestType> Sigmoid = Sigmoid1d<MemorySpace,TestType>(centers, widths);
+        Sigmoid1d<MemorySpace,SigmoidType> Sigmoid {centers, widths};
         CHECK(Sigmoid.GetOrder() == 1 + 2); // Affine + 2 edge terms
         centers = Kokkos::View<double*, MemorySpace>("Sigmoid Centers", 3);
         widths = Kokkos::View<double*, MemorySpace>("Sigmoid widths", 3);
-        Sigmoid = Sigmoid1d<MemorySpace,TestType>(centers, widths);
+        Sigmoid = Sigmoid1d<MemorySpace,SigmoidType>(centers, widths);
         CHECK(Sigmoid.GetOrder() == 1 + 2 + 1); // Affine + 2 edge terms + 1 sigmoid
     }
+    Kokkos::fence();
 
-    unsigned int N_grad_points = 100;
-    double fd_delta = 1e-6;
     const double support_bound = 100.;
 
     SECTION("Single Sigmoid") {
@@ -93,98 +120,131 @@ TEMPLATE_TEST_CASE("Sigmoid1d","[sigmoid1d]", SigmoidTypeSpace::Logistic) {
         Kokkos::View<double*, MemorySpace> center("Sigmoid Center", param_length);
         Kokkos::View<double*, MemorySpace> width("Sigmoid Width", param_length);
         Kokkos::View<double*, MemorySpace> weight("Sigmoid Weight", param_length);
-        for(int i = 0; i < param_length; i++) {
-            center(i) = 0; width(i) = 1; weight(i) = 1.;
-        }
-        Sigmoid1d<MemorySpace,TestType> Sigmoid (center, width, weight);
+        Kokkos::deep_copy(center, 0.);
+        Kokkos::deep_copy(width, 1.);
+        Kokkos::deep_copy(weight, 1.);
+
+        Sigmoid1d<MemorySpace,SigmoidType> Sigmoid (center, width, weight);
 
         // Ensure the sigmoid f is monotone over a grid, f(-infty) = 0, f(+infty) = 1, f(0) = 0.5
         const int num_pts_grid = 100;
         const double grid_bdry = 5.;
-        double eval_pts[num_pts_grid+3];
-        eval_pts[0] = -support_bound; eval_pts[1] = 0.; eval_pts[2] = support_bound;
-        for(int p = 0; p < num_pts_grid; p++) eval_pts[p+3] = -grid_bdry + p*2*grid_bdry/(num_pts_grid-1);
+
+        Kokkos::View<double*, MemorySpace> eval_pts_d {"Grid points device", num_pts_grid+3};
+        Kokkos::RangePolicy<ExecutionSpace> pts_policy {0, num_pts_grid};
+        Kokkos::parallel_for(pts_policy, KOKKOS_LAMBDA(unsigned int p){
+            if(p == 0) {
+                eval_pts_d(0) = -support_bound;
+                eval_pts_d(1) = 0.;
+                eval_pts_d(2) = support_bound;
+            }
+            eval_pts_d(p+3) = -grid_bdry + p*2*grid_bdry/(num_pts_grid-1);
+        });
+        auto eval_pts = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), eval_pts_d);
+        Kokkos::fence();
+
         double expect_output[(order+1)*3] = {
-            1., eval_pts[0], eval_pts[0], 0., 0.,
-            1., eval_pts[1], -std::log(2), std::log(2), 0.5,
-            1., eval_pts[2], 0., eval_pts[2], 1.};
-        double output[(order+1)*(num_pts_grid+3)];
-        
+            1., -support_bound, -support_bound,            0., 0.0,
+            1.,             0.,   -std::log(2),   std::log(2), 0.5,
+            1.,  support_bound,             0., support_bound, 1.0};
+
+        Kokkos::View<double*, MemorySpace> output_d {"Output storage device", (order+1)*(num_pts_grid+3)};
+        Kokkos::RangePolicy<ExecutionSpace> eval_policy {0, num_pts_grid+3};
+        Kokkos::parallel_for(eval_policy, KOKKOS_LAMBDA(unsigned int j){
+            Sigmoid.EvaluateAll(output_d.data() + j*(order+1), order, eval_pts_d(j));
+        });
+        Kokkos::fence();
+        Kokkos::View<double*, Kokkos::HostSpace> output = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_d);
+
         int j = 0;
         for(; j < 3; j++) {
-            Sigmoid.EvaluateAll(output+j*(order+1), order, eval_pts[j]);
             for(int i = 0; i < (order+1); i++) {
                 int idx = j*(order+1)+i;
-                REQUIRE_THAT(output[idx], WithinRel(expect_output[idx], 1e-12));
+                REQUIRE_THAT(output(idx), WithinRel(expect_output[idx], 1e-12));
             }
         }
+
         double prev = 0.;
         for(; j < num_pts_grid + 3; j++) {
-            Sigmoid.EvaluateAll(output+j*(order+1), order, eval_pts[j]);
-            CHECK(output[j*(order+1)  ] == 1.);
-            CHECK(output[j*(order+1)+1] == eval_pts[j]);
-            double next = output[j*(order+1)+3];
+            CHECK(output(j*(order+1)  ) == 1.);
+            CHECK(output(j*(order+1)+1) == eval_pts(j));
+            double next = output(j*(order+1)+3);
             CHECK(next > prev);
             prev = next;
         }
-        TestSigmoidGradients(Sigmoid, 100, 1e-7);
+        TestSigmoidGradients<MemorySpace>(Sigmoid, 100, 1e-7);
     }
 
     SECTION("Multiple Sigmoids") {
         const int num_sigmoids = 3;
         const int order = num_sigmoids+1+2;
         const int param_length = 2 + num_sigmoids*(num_sigmoids+1)/2;
-        Kokkos::View<double*, MemorySpace> center("Sigmoid Center", param_length);
-        Kokkos::View<double*, MemorySpace> width("Sigmoid Width", param_length);
-        Kokkos::View<double*, MemorySpace> weight("Sigmoid Weight", param_length);
+        Kokkos::View<double*, Kokkos::HostSpace> centers("Sigmoid centers", param_length);
+        Kokkos::View<double*, Kokkos::HostSpace> widths("Sigmoid widths", param_length);
+        Kokkos::View<double*, Kokkos::HostSpace> weights("Sigmoid weights", param_length);
         double edge_bound = 3.;
-        center(0) = -edge_bound; width(0) = 2*edge_bound/10; weight(0) = 1.;
-        center(1) =  edge_bound; width(1) = 2*edge_bound/10; weight(1) = 1.;
+        centers(0) = -edge_bound; widths(0) = 2*edge_bound/10; weights(0) = 1.;
+        centers(1) =  edge_bound; widths(1) = 2*edge_bound/10; weights(1) = 1.;
         int param_idx = 2;
         for(int curr_order = 1; curr_order <= num_sigmoids; curr_order++) {
             for(int i = 0; i < curr_order; i++) {
-                center(param_idx) = 4*(-(curr_order-1)/2 + i);
-                width(param_idx) = 1/((double)i+1);
-                weight(param_idx) = 1./curr_order;
+                centers(param_idx) = 4*(-(curr_order-1)/2 + i);
+                widths(param_idx) = 1/((double)i+1);
+                weights(param_idx) = 1./curr_order;
                 param_idx++;
             }
         }
-        Sigmoid1d<MemorySpace,TestType> Sigmoid (center, width, weight);
+        auto centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centers);
+        auto widths_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), widths);
+        auto weights_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), weights);
+        Sigmoid1d<MemorySpace,SigmoidType> Sigmoid {centers_d, widths_d, weights_d};
 
         // Ensure the sigmoid f is monotone over a grid, f(-infty) = 0, f(+infty) = 1
         const int num_pts_grid = 100;
         const double grid_bdry = 5.;
-        double eval_pts[num_pts_grid];
-        for(int p = 0; p < num_pts_grid; p++) {
-            eval_pts[p] = -grid_bdry + 2*p*grid_bdry/(num_pts_grid-1);
-        }
-        double output[order+1];
-        Sigmoid.EvaluateAll(output, order, -support_bound);
-        CHECK(output[0] == 1.);
-        CHECK(output[1] == -support_bound);
-        CHECK(output[2] == -(support_bound-edge_bound)*width(0));
-        CHECK(output[3] == 0.);
-        for(int i = 4; i <= order; i++) {
-            REQUIRE_THAT(output[i], WithinAbs(0., 1e-12));
+        Kokkos::View<double*,MemorySpace> eval_pts_d {"Grid points", num_pts_grid+2};
+        Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, num_pts_grid), KOKKOS_LAMBDA(int p) {
+            if(p == 0) {
+                eval_pts_d(0) = -support_bound;
+                eval_pts_d(1) = support_bound;
+            }
+            eval_pts_d(p+2) = -grid_bdry + 2*p*grid_bdry/(num_pts_grid-1);
+        });
+        auto eval_pts = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), eval_pts_d);
+        Kokkos::View<double*, MemorySpace> output_d {"Output storage device", (order+1)*(num_pts_grid+2)};
+        Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, num_pts_grid+2), KOKKOS_LAMBDA(int j) {
+            Sigmoid.EvaluateAll(output_d.data() + j*(order+1), order, eval_pts_d(j));
+        });
+        Kokkos::fence();
+        auto output = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_d);
+
+        double expect_output[5*2] = {
+            1., -support_bound, -(support_bound-edge_bound)*widths(0), 0.0, 0.,
+            1.,  support_bound,  0.,  (support_bound-edge_bound)*widths(1), 1.
+        };
+
+        for(int i = 0; i < 2; i++) {
+            for(int j = 0; j < 4; j++) {
+                CHECK(output(i*(order+1)+j) == expect_output[i*5 + j]);
+            }
+            for(int j = 4; j <= order; j++) {
+                REQUIRE_THAT(output(i*(order+1)+j), WithinAbs(expect_output[i*5+4], 1e-12));
+            }
         }
-        Sigmoid.EvaluateAll(output, order,  support_bound);
-        CHECK(output[0] == 1.);
-        CHECK(output[1] == support_bound);
-        CHECK(output[2] == 0.);
-        CHECK(output[3] == (support_bound-edge_bound)*width(1));
-        for(int i = 4; i <= order; i++) REQUIRE_THAT(output[i], WithinAbs(1., 1e-12));
+        unsigned int output_start_idx = 2*(order+1);
         double prev[order+1] = {0.};
         // set prev for left edge term to negative infty
         prev[2] = -2*support_bound;
         for(int j = 0; j < num_pts_grid; j++) {
-            Sigmoid.EvaluateAll(output, order, eval_pts[j]);
-            CHECK(output[0] == 1.);
-            CHECK(output[1] == eval_pts[j]);
-            for(int curr_sigmoid = 2; curr_sigmoid <= order; curr_sigmoid++) {
-                CHECK(output[curr_sigmoid] > prev[curr_sigmoid]);
-                prev[curr_sigmoid] = output[curr_sigmoid];
+            CHECK(output(output_start_idx + 0) == 1.); // Constant
+            CHECK(output(output_start_idx + 1) == eval_pts(j+2)); // Linear
+            for(int curr_sigmoid = 2; curr_sigmoid <= order; curr_sigmoid++) { // edge+sigmoids
+                double out_val = output(output_start_idx + curr_sigmoid);
+                CHECK(out_val > prev[curr_sigmoid]);
+                prev[curr_sigmoid] = out_val;
             }
+            output_start_idx += order+1;
         }
-        TestSigmoidGradients(Sigmoid, 100, 1e-7);
+        TestSigmoidGradients<MemorySpace>(Sigmoid, 100, 1e-7);
     }
-}
\ No newline at end of file
+}}
\ No newline at end of file
diff --git a/tests/Test_SummarizedMap.cpp b/tests/Test_SummarizedMap.cpp
index bd5e111d..17e963ec 100644
--- a/tests/Test_SummarizedMap.cpp
+++ b/tests/Test_SummarizedMap.cpp
@@ -41,7 +41,6 @@ TEST_CASE( "SummarizedMap", "[SummarizedMap_MonotoneComponent]" ) {
         sumMap->SetCoeffs(coeffs);
 
         // Now make sure that the coefficients of each block were set
-        unsigned int cumCoeffInd = 0;
         for(unsigned int i=0; i<sumMap->numCoeffs; ++i){
             CHECK(sumMap->Coeffs()(i) == 0.1*(i+1)); // Values of coefficients should be correct
             CHECK(sumMap->Coeffs()(i) == comp->Coeffs()(i)); // Values of coefficients should be equal to those of comp
diff --git a/tests/Test_TrainMap.cpp b/tests/Test_TrainMap.cpp
index a175a745..57e9c142 100644
--- a/tests/Test_TrainMap.cpp
+++ b/tests/Test_TrainMap.cpp
@@ -3,6 +3,7 @@
 #include "MParT/MapFactory.h"
 #include "MParT/MapObjective.h"
 #include "MParT/TrainMap.h"
+#include "MParT/Utilities/KokkosSpaceMappings.h"
 #include "MParT/Distributions/GaussianSamplerDensity.h"
 
 // For testing the normality of the pushforward
@@ -11,17 +12,19 @@
 using namespace mpart;
 using namespace Catch;
 
+using MemorySpace = Kokkos::HostSpace;
+
 TEST_CASE("Test_TrainMap", "[TrainMap]") {
     unsigned int seed = 42;
     unsigned int dim = 2;
     unsigned int numPts = 5000;
     unsigned int testPts = numPts / 5;
-    auto sampler = std::make_shared<GaussianSamplerDensity<Kokkos::HostSpace>>(3);
+    auto sampler = std::make_shared<GaussianSamplerDensity<MemorySpace>>(3);
     sampler->SetSeed(seed);
     auto samples = sampler->Sample(numPts);
     Kokkos::View<double**, Kokkos::HostSpace> targetSamples("targetSamples", 3, numPts);
-    double max = 0;
-    Kokkos::parallel_for("Banana", numPts, KOKKOS_LAMBDA(const unsigned int i) {
+    Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0u, numPts};
+    Kokkos::parallel_for("Banana", policy, KOKKOS_LAMBDA(const unsigned int i) {
         targetSamples(0,i) = samples(0,i);
         targetSamples(1,i) = samples(1,i);
         targetSamples(2,i) = samples(2,i) + samples(1,i)*samples(1,i);
diff --git a/tests/Test_TrainMapAdaptive.cpp b/tests/Test_TrainMapAdaptive.cpp
index b3c54acd..f3a75867 100644
--- a/tests/Test_TrainMapAdaptive.cpp
+++ b/tests/Test_TrainMapAdaptive.cpp
@@ -10,6 +10,7 @@ using namespace Catch;
 
 #include "MParT/Utilities/LinearAlgebra.h"
 #include "Distributions/Test_Distributions_Common.h"
+using MemorySpace = Kokkos::HostSpace;
 
 void NormalizeSamples(StridedMatrix<double, Kokkos::HostSpace> mat) {
     using MemorySpace = Kokkos::HostSpace;
@@ -154,7 +155,8 @@ TEST_CASE("Adaptive Transport Map","[ATM]") {
     // }
     SECTION("TraditionalBananaOneComp") {
         Kokkos::View<double**, Kokkos::HostSpace> targetSamples("targetSamples", 2, numPts);
-        Kokkos::parallel_for("Intializing targetSamples", numPts, KOKKOS_LAMBDA(const unsigned int i){
+        Kokkos::RangePolicy<typename MemoryToExecution<MemorySpace>::Space> policy {0u, numPts};
+        Kokkos::parallel_for("Intializing targetSamples", policy, KOKKOS_LAMBDA(const unsigned int i){
             targetSamples(0,i) = samples(0,i);
             targetSamples(1,i) = samples(1,i) + samples(0,i)*samples(0,i);
         });
diff --git a/tests/Test_TriangularMap.cpp b/tests/Test_TriangularMap.cpp
index 2e2b6940..3bb2c18d 100644
--- a/tests/Test_TriangularMap.cpp
+++ b/tests/Test_TriangularMap.cpp
@@ -583,7 +583,6 @@ TEST_CASE( "Testing TriangularMap made using CreateSingleEntryMap", "[Triangular
         triMap->SetCoeffs(coeffs);
 
         // Now make sure that the coefficients of each block were set
-        unsigned int cumCoeffInd = 0;
         for(unsigned int i=0; i<triMap->numCoeffs; ++i){
                 CHECK(comp->Coeffs()(i) == triMap->Coeffs()(i)); // Values of coefficients should be equal
                 CHECK(&comp->Coeffs()(i) == &triMap->Coeffs()(i)); // Memory location should also be the same (no copy)
@@ -604,9 +603,6 @@ TEST_CASE( "Testing TriangularMap made using CreateSingleEntryMap", "[Triangular
 
     SECTION("Evaluation"){
 
-        unsigned int start = 0;
-
-
         auto inTop = Kokkos::subview(in, std::make_pair(0, int(activeInd-1)), Kokkos::ALL());
         auto inTopAndMid = Kokkos::subview(in, std::make_pair(0, int(activeInd)), Kokkos::ALL());
         auto inBot = Kokkos::subview(in, std::make_pair(int(activeInd), int(dim)), Kokkos::ALL());
diff --git a/tests/Test_UnivariateExpansion.cpp b/tests/Test_UnivariateExpansion.cpp
index 6e401632..f3772352 100644
--- a/tests/Test_UnivariateExpansion.cpp
+++ b/tests/Test_UnivariateExpansion.cpp
@@ -88,7 +88,6 @@ TEST_CASE("UnivariateExpansion") {
         StridedMatrix<double, MemorySpace> logDetGrad = expansion.LogDeterminantInputGrad(points);
         REQUIRE(logDetGrad.extent(0) == 1);
         REQUIRE(logDetGrad.extent(1) == numPts);
-        unsigned int counter = 0;
         for(int i = 0; i < numPts; i++) {
             double df = 0.;
             double d2f = 0.;

From 66dbc3dcd7c3d95853aa91c5eaace3a5da3fe3bd Mon Sep 17 00:00:00 2001
From: Daniel <43151183+dannys4@users.noreply.github.com>
Date: Mon, 1 Apr 2024 17:10:20 -0400
Subject: [PATCH 7/8] Fix GitHub CI on release (#405)

---
 .github/workflows/build-bindings.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-bindings.yml b/.github/workflows/build-bindings.yml
index 4856f39c..1ab32340 100644
--- a/.github/workflows/build-bindings.yml
+++ b/.github/workflows/build-bindings.yml
@@ -48,7 +48,7 @@ jobs:
 
       - name: Setup Julia
         run: |
-          julia -e "using Pkg; Pkg.add([\"CxxWrap\",\"TestReports\"])"
+          julia -e "using Pkg; Pkg.add([Pkg.PackageSpec(;name=\"CxxWrap\",version=v\"0.14.2\"),Pkg.PackageSpec(;name=\"TestReports\")])"
           export GITHUB_JULIA_PATH=$(julia -e "println(DEPOT_PATH[1])")
           echo -n $'[bee5971c-294f-5168-9fcd-9fb3c811d495]\nMParT = \"' >> $GITHUB_JULIA_PATH/artifacts/Overrides.toml
           echo -n $GITHUB_WORKSPACE                                     >> $GITHUB_JULIA_PATH/artifacts/Overrides.toml

From 1ae526b6c53a3a70d9ee9e9bd636afd4e3fca584 Mon Sep 17 00:00:00 2001
From: Daniel <43151183+dannys4@users.noreply.github.com>
Date: Tue, 2 Apr 2024 07:02:46 -0400
Subject: [PATCH 8/8] Fix issue 286 (#403)

* Fix issue 286

* Add binding change
---
 bindings/python/src/MultiIndex.cpp          | 32 +++++++++++++++++++++
 bindings/python/tests/test_MultiIndexSet.py |  8 ++++++
 2 files changed, 40 insertions(+)

diff --git a/bindings/python/src/MultiIndex.cpp b/bindings/python/src/MultiIndex.cpp
index d97c9614..2e01bcd0 100644
--- a/bindings/python/src/MultiIndex.cpp
+++ b/bindings/python/src/MultiIndex.cpp
@@ -24,6 +24,37 @@
 
 namespace py = pybind11;
 using namespace mpart::binding;
+template<typename Scalar_T>
+using Matrix_Map_T = Eigen::Map<Eigen::Matrix<Scalar_T,Eigen::Dynamic,Eigen::Dynamic>, 0, Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>>;
+
+mpart::MultiIndexSet MultiIndexSet_PyBuffer(py::buffer x){
+    constexpr bool rowMajor = Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic>::Flags & Eigen::RowMajorBit;
+
+    py::buffer_info info = x.request();
+
+    // Check for int32, int64
+    bool is_int32 = info.format == py::format_descriptor<int32_t>::format();
+    bool is_int64 = info.format == "l"; // This is based on a pybind bug; numpy int64 buffer is l, not q
+    if (!(is_int32 || is_int64))
+        throw std::runtime_error("Incompatible format: expected an array of either int32 or int64!");
+
+    if (info.ndim != 2)
+        throw std::runtime_error("Expected array with ndims = 2");
+
+    int stride_size = is_int32 ? sizeof(int32_t) : sizeof(int64_t);
+    Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic> strides(
+        info.strides[rowMajor ? 0 : 1] / (py::ssize_t)stride_size,
+        info.strides[rowMajor ? 1 : 0] / (py::ssize_t)stride_size
+        );
+
+    if(is_int64) { // Is int64
+        Matrix_Map_T<int64_t> map_64 (static_cast<int64_t*>(info.ptr), info.shape[0], info.shape[1], strides);
+        return mpart::MultiIndexSet {map_64.cast<int32_t>()};
+    } else { // Is int32
+        Matrix_Map_T<int32_t> map (static_cast<int32_t*>(info.ptr), info.shape[0], info.shape[1], strides);
+        return mpart::MultiIndexSet {map};
+    }
+}
 
 void mpart::binding::MultiIndexWrapper(py::module &m)
 {
@@ -111,6 +142,7 @@ void mpart::binding::MultiIndexWrapper(py::module &m)
     // MultiIndexSet
     py::class_<MultiIndexSet, std::shared_ptr<MultiIndexSet>>(m, "MultiIndexSet")
         .def(py::init<const unsigned int>())
+        .def(py::init<std::function<MultiIndexSet(py::buffer)>>(&MultiIndexSet_PyBuffer))
         .def(py::init<Eigen::Ref<const Eigen::MatrixXi> const&>())
         .def("fix", &MultiIndexSet::Fix)
         .def("__len__", &MultiIndexSet::Length, "Retrieves the length of _each_ multiindex within this set (i.e. the dimension of the input)")
diff --git a/bindings/python/tests/test_MultiIndexSet.py b/bindings/python/tests/test_MultiIndexSet.py
index 9b70ec4f..85fa4f3d 100644
--- a/bindings/python/tests/test_MultiIndexSet.py
+++ b/bindings/python/tests/test_MultiIndexSet.py
@@ -10,6 +10,14 @@
 msetTensorProduct = mpart.MultiIndexSet.CreateTensorProduct(3,4,noneLim)
 msetTotalOrder = mpart.MultiIndexSet.CreateTotalOrder(3,4,noneLim)
 
+def test_create():
+    mset_one = mpart.MultiIndexSet([[2]])
+    assert mset_one.Size() == 1
+    assert len(mset_one) == 1
+    mset_one = mpart.MultiIndexSet(np.array([[2]]))
+    assert mset_one.Size() == 1
+    assert len(mset_one) == 1
+
 def test_max_degrees():
 
     assert np.all(msetFromArray.MaxOrders() == [2,1])