diff --git a/.gitignore b/.gitignore
index 105cefc..0fdc5a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,6 @@ doc/sphinx/_build
 build/
 dist/
 pyoptmat.egg-info/
+examples/ode/results.txt
+examples/ode/log
+results.txt
diff --git a/doc/sphinx/chunktime.rst b/doc/sphinx/chunktime.rst
new file mode 100644
index 0000000..d796478
--- /dev/null
+++ b/doc/sphinx/chunktime.rst
@@ -0,0 +1,7 @@
+pyoptmat.chunktime: utilities for blocked time integration
+----------------------------------------------------------
+
+.. automodule:: pyoptmat.chunktime
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/doc/sphinx/conf.py b/doc/sphinx/conf.py
index 63d08bc..c04b224 100644
--- a/doc/sphinx/conf.py
+++ b/doc/sphinx/conf.py
@@ -22,7 +22,7 @@
 author = 'Argonne National Laboratory'
 
 # The full version, including alpha/beta/rc tags
-release = '1.1.3'
+release = '1.2.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/doc/sphinx/examples.rst b/doc/sphinx/examples.rst
index 390b86a..5932eeb 100644
--- a/doc/sphinx/examples.rst
+++ b/doc/sphinx/examples.rst
@@ -28,6 +28,14 @@ repeats the :doc:`tutorial <structural_tutorial>`, but using
 synthetic cyclic test data a :py:class:`pyoptmat.hardening.ChabocheHardeningModel` viscoplastic model capable
 of capturing kinematic hardening behavior.
 
+Neural ODE example
+------------------
+
+Inferring the response of a coupled mass-spring-damper system using
+trajectory data (1) a physical ODE model and (2) a neural ODE model.
+
+.. literalinclude:: /../../examples/ode/damping.py
+
 Implicit versus explicit integration
 ------------------------------------
 
@@ -57,6 +65,13 @@ performance of both approaches.
 
 .. literalinclude:: /../../examples/ode/trajectory.py
 
+Hogdkin-Huxley coupled neuron model
+-----------------------------------
+
+An example, scalable system of ODEs for benchmarking.
+
+.. literalinclude:: /../../examples/ode/neuron.py
+
 Temperature dependent parameters
 --------------------------------
 
diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst
index e4dc81a..56a62be 100644
--- a/doc/sphinx/index.rst
+++ b/doc/sphinx/index.rst
@@ -40,6 +40,8 @@ pyoptmat features
   uncertain data.
 - Efficient backward pass/gradient calculation using the adjoint method.  This
   approach vastly outperforms automatic differentiation for time series data. 
+- Blocked time integration for both the forward and backward/adjoint passes, which 
+  vectorizes/parallelizes integrating ODEs through time.
 - Implicit time integration algorithms suitable for material models represents
   as stiff systems of ordinary differential equations.
 - Prebuilt model components aimed at high temperature structural materials.
@@ -78,6 +80,7 @@ The following are links to the complete API descriptions of each pyoptmat submod
    optimize
    experiments
    ode
+   chunktime
    solvers
    models
    flowrules
diff --git a/doc/sphinx/refs.bib b/doc/sphinx/refs.bib
index d5f6468..a937b13 100644
--- a/doc/sphinx/refs.bib
+++ b/doc/sphinx/refs.bib
@@ -38,3 +38,12 @@ @article{chaboche1989unified
   volume={111},
   pages={424--430}
 }
+
+@article{schwemmer2012theory,
+  title={The theory of weakly coupled oscillators},
+  author={Schwemmer, Michael A and Lewis, Timothy J},
+  journal={Phase response curves in neuroscience: Theory, experiment, and analysis},
+  pages={3--31},
+  year={2012},
+  publisher={Springer}
+}
diff --git a/examples/ode/damping.py b/examples/ode/damping.py
new file mode 100755
index 0000000..a4aa80f
--- /dev/null
+++ b/examples/ode/damping.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python3
+
+"""
+Sample problem trying to infer the response of a chain of serially
+connected mass-spring-dashpot 1D elements, fixed at one end of the
+chain and subject to a force on the other end.
+
+The number of elements in the chain is `n_chain`.
+
+This example
+1. Generates `n_samples` trajectories under a random force described by
+   ...  We separate out only the end displacements as observable,
+   all the interior displacements are treated as hidden state.
+2. Try to infer the mass/spring/dashpot constants and the system response
+   using the analytical ODE as a model, given the end-displacement
+   trajectories as input.
+3. Again try to infer the response from the end displacement trajectories
+   but now using a neural ODE as the model.
+
+The integration will use `n_time` points in the integration.
+"""
+
+import itertools
+
+import torch
+from torch import nn
+
+from functorch import vmap, jacrev
+
+import tqdm
+import matplotlib.pyplot as plt
+
+import sys
+sys.path.append("../..")
+from pyoptmat import ode, utility, neuralode
+
+if torch.cuda.is_available():
+    dev = "cuda:0"
+else:
+    dev = "cpu"
+device = torch.device(dev)
+
+# I'm assuming single precision will be fine for this but for now use doubles
+torch.set_default_tensor_type(torch.DoubleTensor)
+
+class MassDamperSpring(torch.nn.Module):
+    """
+    Mass, spring, damper system of arbitrary size
+
+    I use simple substitution to solve for the velocity and displacement
+    in a first order system, so the size of the system is twice the size of the
+    number of elements.
+
+    Args:
+        K (torch.tensor): (n_chain,) vector of stiffnesses
+        C (torch.tensor): (n_chain,) vector of dampers
+        M (torch.tensor): (n_chain,) vector of masses
+        force (function): gives f(t)
+    """
+    def __init__(self, K, C, M, force, C_scale = 1.0e-4, M_scale = 1.0e-5):
+        super().__init__()
+
+        self.K = torch.nn.Parameter(K)
+        self.C = torch.nn.Parameter(C)
+        self.M = torch.nn.Parameter(M)
+        self.force = force
+
+        self.C_scale = C_scale
+        self.M_scale = M_scale
+        
+        self.half_size = K.shape[0]
+        self.size = 2 * self.half_size
+
+    def forward(self, t, y):
+        """
+        Return the state rate
+
+        Args:
+            t (torch.tensor): (nchunk, nbatch) tensor of times
+            y (torch.tensor): (nchunk, nbatch, size) tensor of state
+
+        Returns:
+            y_dot: (nchunk, nbatch, size) tensor of state rates
+            J:     (nchunk, nbatch, size, size) tensor of Jacobians
+        """
+        ydot = torch.zeros_like(y)
+        J = torch.zeros(y.shape + (self.size,), device = t.device)
+
+        # Separate out for convenience
+        u = y[...,:self.half_size]
+        v = y[...,self.half_size:]
+        
+        # Differences
+        du = torch.diff(u, dim = -1)
+        dv = torch.diff(v, dim = -1)
+
+        # Scaled properties
+        M = self.M * self.M_scale
+        C = self.C * self.C_scale
+        
+        # Rate
+        # Velocity
+        ydot[...,:self.half_size] = v
+        
+        # Springs
+        ydot[...,self.half_size:-1] += self.K[...,:-1] * du / M[...,:-1]
+        ydot[...,self.half_size+1:] += -self.K[...,:-1] * du / M[...,1:]
+        ydot[...,-1] += -self.K[...,-1] * u[...,-1] / M[...,-1]
+        ydot[...,self.half_size] += self.force(t) / M[...,0]
+
+        # Dampers
+        ydot[...,self.half_size:-1] += C[...,:-1] * dv / M[...,:-1]
+        ydot[...,self.half_size+1:] += -C[...,:-1] * dv / M[...,1:]
+        ydot[...,-1] += -C[...,-1] * v[...,-1] / M[...,-1]
+
+        # Derivative
+        # Velocity 
+        J[...,:self.half_size,self.half_size:] += torch.eye(self.half_size, device = t.device)
+
+        # Springs
+        J[...,self.half_size:,:self.half_size] += torch.diag_embed(-self.K / M)
+        J[...,self.half_size+1:,1:self.half_size] += torch.diag_embed(-self.K[...,:-1] / M[...,1:])
+        J[...,self.half_size:,:self.half_size] += torch.diag_embed(self.K[...,:-1] / M[...,:-1], offset = 1)
+        J[...,self.half_size:,:self.half_size] += torch.diag_embed(self.K[...,:-1] / M[...,1:], offset = -1)
+
+        # Dampers
+        J[...,self.half_size:,self.half_size:] += torch.diag_embed(-C / M)
+        J[...,self.half_size+1:,self.half_size+1:] += torch.diag_embed(-C[...,:-1] / M[...,1:])
+        J[...,self.half_size:,self.half_size:] += torch.diag_embed(C[...,:-1] / M[...,:-1], offset = 1)
+        J[...,self.half_size:,self.half_size:] += torch.diag_embed(C[...,:-1] / M[...,1:], offset = -1)
+
+        return ydot, J
+
+    def initial_condition(self, nsamples):
+        """
+        I don't want to learn the initial condition (we could) 
+        so just return deterministic zeros
+        """
+        return torch.zeros(nsamples, self.size, device = device)
+
+def neural_ode_factory(n_hidden, n_layers, n_inter):
+    """Make a neural ODE to try
+    
+    Args:
+        n_hidden (int): number of hidden state variables
+        n_layers (int): network depth
+        n_inter (int): size of hidden layers
+    """
+    n_in = n_hidden + 2
+    n_out = n_hidden + 1
+    n_inter += 1
+
+    mods = [nn.Linear(n_in, n_inter), nn.ReLU()
+            ] + list(itertools.chain(*[[nn.Linear(n_inter, n_inter), nn.ReLU()] for i in range(n_layers)])
+                    ) + [nn.Linear(n_inter, n_out)]
+
+    return nn.Sequential(*mods)
+
+def random_walk(time, mean, scale, mag):
+    """
+    Simulate a random walk with velocity of the given mean and scale
+    """
+    results = torch.zeros_like(time)
+    for i in range(1, time.shape[0]):
+        dt = time[i] - time[i-1]
+        v = mag*torch.normal(mean, scale)
+        results[i] = results[i-1] + v * dt
+
+    return results
+
+if __name__ == "__main__":
+    # Basic parameters
+    n_chain = 5     # Number of spring-dashpot-mass elements
+    n_samples = 5   # Number of random force samples
+    n_time = 500    # Number of time steps
+    integration_method = 'backward-euler'
+
+    # Time chunking -- best value may vary on your system
+    n_chunk = 100
+
+    # Ending time
+    t_end = 1.0
+
+    # Mean and standard deviation for random force "velocity"
+    force_mean = torch.tensor(0.0, device = device)
+    force_std = torch.tensor(1.0, device = device)
+    force_mag = torch.tensor(1.0, device = device)
+
+    # True values of the mass, damping, and stiffness
+    K = torch.rand(n_chain, device = device)
+    C = torch.rand(n_chain, device = device)
+    M = torch.rand(n_chain, device = device)
+
+    # Time values
+    time = torch.linspace(0, t_end, n_time, device = device).unsqueeze(-1).expand(n_time, n_samples)
+
+    # 1. Generate the data
+
+    # Generate the force values
+    force = random_walk(time, force_mean.unsqueeze(0).expand(n_samples), force_std.unsqueeze(0).expand(n_samples), force_mag.unsqueeze(0).expand(n_samples))
+
+    # Plot them
+    plt.figure()
+    plt.plot(time.cpu().numpy(), force.cpu().numpy())
+    plt.show()
+
+    # Interpolate in time
+    force_continuous = utility.ArbitraryBatchTimeSeriesInterpolator(time, force)
+
+    # Setup the ground truth model
+    model = MassDamperSpring(K, C, M, force_continuous)
+
+    # Generate the initial condition
+    y0 = model.initial_condition(n_samples)
+
+    # Generate the data
+    with torch.no_grad():
+        y_data = ode.odeint(model, y0, time, method = integration_method, block_size = n_chunk)
+
+    # The observations will just be the first entry
+    observable = y_data[...,0]
+    
+    # Plot them
+    plt.figure()
+    plt.plot(time.cpu().numpy(), observable.cpu().numpy())
+    plt.show()
+    
+    # 2. Infer with the actual model
+    ode_model = MassDamperSpring(torch.rand(n_chain, device = device),
+            torch.rand(n_chain, device = device),
+            torch.rand(n_chain, device = device), force_continuous)
+    
+    # Training parameters...
+    niter = 1000
+    lr = 1.0e-3
+    loss = torch.nn.MSELoss(reduction = "sum")
+
+    # Optimization setup
+    optim = torch.optim.Adam(ode_model.parameters(), lr)
+    def closure():
+        optim.zero_grad()
+        pred = ode.odeint_adjoint(ode_model, y0, time, method = integration_method, block_size = n_chunk)
+        obs = pred[...,0]
+        lossv = loss(obs, observable)
+        lossv.backward()
+        return lossv
+
+    # Optimization loop
+    t = tqdm.tqdm(range(niter), total = niter, desc = "Loss:     ")
+    loss_history = []
+    for i in t:
+        closs = optim.step(closure)
+        loss_history.append(closs.detach().cpu().numpy())
+        t.set_description("Loss: %3.2e" % loss_history[-1])
+
+    # Plot the loss history
+    plt.figure()
+    plt.plot(loss_history)
+    plt.show()
+
+    # Make the final predictions
+    with torch.no_grad():
+        ode_preds = ode.odeint(ode_model, y0, time, method = integration_method, block_size = n_chunk)
+        ode_obs = ode_preds[...,0]
+
+    # Make a plot
+    plt.figure()
+    for i in range(n_samples):
+        l, = plt.plot(time.cpu().numpy()[:,i], observable.cpu().numpy()[:,i])
+        plt.plot(time.cpu().numpy()[:,i], ode_obs.cpu().numpy()[:,i], ls = '--', color = l.get_color())
+    plt.show()
+
+    # 2. Infer with a neural ODE
+    # Setup the model
+    n_hidden = n_chain # I don't know, seems reasonable
+    n_layers = 3
+    n_inter = n_chain # Same thing, seems reasonable
+    
+    nn = neural_ode_factory(n_hidden, n_layers, n_inter).to(device) 
+    y0 = torch.zeros(n_samples, n_hidden+1, device = device)
+
+    nn_model = neuralode.NeuralODE(nn, lambda t: force_continuous(t).unsqueeze(-1)) 
+
+    # Training parameters
+    niter = 1000
+    lr = 1.0e-3
+    loss = torch.nn.MSELoss(reduction = "sum")
+    
+    # Optimization setup
+    optim = torch.optim.Adam(nn_model.parameters(), lr)
+    def closure():
+        optim.zero_grad()
+        pred = ode.odeint_adjoint(nn_model, y0, time, method = integration_method, block_size = n_chunk)
+        obs = pred[...,0]
+        lossv = loss(obs, observable)
+        lossv.backward()
+        return lossv
+
+    # Optimization loop
+    t = tqdm.tqdm(range(niter), total = niter, desc = "Loss:     ")
+    loss_history = []
+    for i in t:
+        closs = optim.step(closure)
+        loss_history.append(closs.detach().cpu().numpy())
+        t.set_description("Loss: %3.2e" % loss_history[-1])
+
+    # Plot the loss history
+    plt.figure()
+    plt.plot(loss_history)
+    plt.show()
+
+    # Make the final predictions
+    with torch.no_grad():
+        nn_preds = ode.odeint(nn_model, y0, time, method = integration_method, block_size = n_chunk)
+        nn_obs = nn_preds[...,0]
+
+    # Make a plot
+    plt.figure()
+    for i in range(n_samples):
+        l, = plt.plot(time.cpu().numpy()[:,i], observable.cpu().numpy()[:,i])
+        plt.plot(time.cpu().numpy()[:,i], nn_obs.cpu().numpy()[:,i], ls = '--', color = l.get_color())
+    plt.show()
+    
+    # Compare all three
+    plt.figure()
+    for i in range(n_samples):
+        l, = plt.plot(time.cpu().numpy()[:,i], observable.cpu().numpy()[:,i])
+        plt.plot(time.cpu().numpy()[:,i], ode_obs.cpu().numpy()[:,i], ls = '--', color = l.get_color())
+        plt.plot(time.cpu().numpy()[:,i], nn_obs.cpu().numpy()[:,i], ls = ':', color = l.get_color())
+    plt.show()
diff --git a/examples/ode/neuron.py b/examples/ode/neuron.py
index f3da48a..7f745ef 100755
--- a/examples/ode/neuron.py
+++ b/examples/ode/neuron.py
@@ -1,16 +1,29 @@
 #!/usr/bin/env python
 
+"""
+    This example integrates a more complicated system of ODEs:
+    a model of coupled neurons responding to a cyclic 
+    electrical current (the Hodgkin and Huxley model, as
+    described by :cite:`schwemmer2012theory`).
+
+    The number of neurons, current cycles, time steps per 
+    cycle, vectorized time steps, etc. can be customized.
+    This is a decent benchmark problem for examining the performance
+    of pyoptmat on your machine.
+"""
+
+import sys
+sys.path.append('../..')
+
 import torch
 import torch.nn as nn
 
-import torch.jit
-from torch.profiler import ProfilerActivity
-
 import matplotlib.pyplot as plt
 
 from pyoptmat import ode, experiments, utility
 import time
 
+# Use doubles
 torch.set_default_tensor_type(torch.DoubleTensor)
 
 # Select device to run on
@@ -22,6 +35,20 @@
 
 def driving_current(I_max, R, rate, thold, chold, Ncycles, nload,
         nhold, neq):
+    """
+        Setup cyclic current histories
+
+        Args:
+            I_max (tuple): (min,max) uniform distribution of max currents
+            R (tuple): (min,max) uniform distribution of load ratios
+            rate (tuple): (min, max) uniform distribution of current rates
+            thold (tuple): (min, max) uniform distribution of hold at max current times
+            chold (tuple): (min, max) uniform distribution of hold at min current times
+            Ncycles (int): number of load cycles
+            nload (int): number of time steps for current ramp
+            nhold (int): number of time steps for current holds
+            neq (int): number of independent current histories
+    """
     Ntotal = (4*nload + 2*nhold) * Ncycles + 1
     times = torch.empty((Ntotal,neq))
     I = torch.empty((Ntotal,neq))
@@ -33,14 +60,57 @@ def driving_current(I_max, R, rate, thold, chold, Ncycles, nload,
         times[:,i] = torch.tensor(ti)
         I[:,i] = torch.tensor(Ii)
     
-    return utility.BatchTimeSeriesInterpolator(
-            times.to(device), I.to(device)), times.to(device)
+    return utility.ArbitraryBatchTimeSeriesInterpolator(
+            times.to(device), I.to(device)), times.to(device), I.to(device)
 
 class HodgkinHuxleyCoupledNeurons(torch.nn.Module):
     """
-        From Schwemmer and Lewis 2012
+        As described by :cite:`schwemmer2012theory`
+
+        .. math::
+
+            F(y) = \\begin{bmatrix} F_1(y_1)
+            \\\\ F_2(y_2)
+            \\\\ \\vdots
+            \\\\ F_n(y_n) 
+            \\end{bmatrix}
+
+        with
+
+        .. math::
+
+            y = \\begin{bmatrix} y_1
+            \\\ y_2
+            \\\ \\vdots
+            \\\ y_n
+            \\end{bmatrix}
+
+        and
+
+        .. math::
+
+            F_i(y_i) = \\begin{bmatrix} V_i
+            \\\\ m_i
+            \\\\ h_i
+            \\\\ n_i
+            \\end{bmatrix}
+
+        and
+
+        .. math::
+
+            F_i(y_i) = \\begin{bmatrix} \\frac{1}{C_i}(-g_{Na,i}m_i^3h_i(V_i-E_{Na,i})-g_{K,i}n_i^4(V_i-E_{k,i})-g_{L,i}(V_i-E_{L,i})+I(t) + \\Delta V_{ij})
+            \\\\ \\frac{m_{\\infty,i}-m_i}{\\tau_{m,i}}
+            \\\\ \\frac{h_{\\infty,i}-h_i}{\\tau_{h,i}}
+            \\\\ \\frac{n_{\\infty,i}-n_i}{\\tau_{n,i}}
+            \\end{bmatrix}
+
+        with
+
+        .. math::
+
+            \\Delta V_{ij} = \\sum_{j=1}^{n_{neurons}}\\frac{g_{C,i}(V_i-V_j)}{C_i}
 
-        dX / dt = 
     """
     def __init__(self, C, g_Na, E_Na, g_K, E_K, g_L, E_L, m_inf,
             tau_m, h_inf, tau_h, n_inf, tau_n, g_C, I):
@@ -66,12 +136,23 @@ def __init__(self, C, g_Na, E_Na, g_K, E_K, g_L, E_L, m_inf,
         self.n_equations = self.n_neurons * 4
 
     def forward(self, t, y):
+        """
+            Evaluate the system of ODEs at a particular state
+
+            Args:
+                t (torch.tensor): current time 
+                y (torch.tensor): current state
+
+            Returns:
+                y_dot (torch.tensor): current ODEs
+                J (torch.tensor): current jacobian
+        """
         Ic = self.I(t)
 
-        V = y[...,0::4]
-        m = y[...,1::4]
-        h = y[...,2::4]
-        n = y[...,3::4]
+        V = y[...,0::4].clone()
+        m = y[...,1::4].clone()
+        h = y[...,2::4].clone()
+        n = y[...,3::4].clone()
 
         ydot = torch.zeros_like(y)
         
@@ -102,9 +183,9 @@ def forward(self, t, y):
                 -self.g_K[None,...]*n**4.0))
         # Coupling term
         J[...,0::4,0::4] -= self.g_C[None,...] / self.C[None,...] 
-        J[...,0::4,0::4] += torch.repeat_interleave(torch.eye(self.n_neurons, 
-            device = ydot.device).unsqueeze(0), ydot.shape[0], 0
-                ) * torch.sum(self.g_C / self.C)
+        
+        J[...,0::4,0::4] += torch.eye(self.n_neurons, device = ydot.device).expand(
+                *ydot.shape[:-1], -1, -1) * torch.sum(self.g_C / self.C)
         
         # V, m
         J[...,0::4,1::4] = torch.diag_embed(-1.0 / self.C[None,...] * (
@@ -129,15 +210,20 @@ def forward(self, t, y):
     
         return ydot, J
 
-
-
 if __name__ == "__main__":
-    nbatch = 1000
+    # Batch size
+    nbatch = 100
+    # Time chunk size
+    time_block = 50
+    # Number of neurons
     neq = 10
+    # Number of cycles
     N = 5
+    # Number of time steps per cycle
     n = 40
     
-    current, times = driving_current([0.1,1],
+    # Setup the driving current
+    current, times, discrete_current = driving_current([0.1,1],
             [-1,0.9], 
             [0.5,1.5],
             [0,1],
@@ -147,6 +233,12 @@ def forward(self, t, y):
             n,
             nbatch)
 
+    plt.plot(times.cpu().numpy()[:,::4], discrete_current.cpu().numpy()[:,::4])
+    plt.xlabel("Time")
+    plt.ylabel("Current")
+    plt.show()
+
+    # Setup model and initial conditions
     nsteps = times.shape[0]
     
     model = HodgkinHuxleyCoupledNeurons(
@@ -171,14 +263,14 @@ def forward(self, t, y):
 
     # Include a backward pass to give a better example of timing
     t1 = time.time()
-    res_imp = ode.odeint_adjoint(model, y0, times, iterative_linear_solver = True)
+    res_imp = ode.odeint_adjoint(model, y0, times, block_size = time_block)
     loss = torch.norm(res_imp)
     loss.backward()
     etime = time.time() - t1
     
-    print("%i batch size, %i neurons, %i time steps: %f s" % (nbatch, neq, nsteps, etime))
+    print("%i batch size, %i blocked time steps, %i neurons, %i time steps: %f s" % (nbatch, time_block, neq, nsteps, etime))
 
-    #plt.plot(times[:,0].detach().cpu().numpy(), res_imp[:,0,0::4].detach().cpu().numpy())
-    #plt.xlabel("Time")
-    #plt.ylabel("Voltage")
-    #plt.show()
+    plt.plot(times[:,0].detach().cpu().numpy(), res_imp[:,0,0::4].detach().cpu().numpy())
+    plt.xlabel("Time")
+    plt.ylabel("Voltage")
+    plt.show()
diff --git a/examples/ode/trajectory.py b/examples/ode/trajectory.py
index 7b187c4..fbf1e50 100755
--- a/examples/ode/trajectory.py
+++ b/examples/ode/trajectory.py
@@ -63,10 +63,6 @@
 # Prior for the noise
 eps_prior = 0.1  # Just measure variance in data...
 
-# If true use torch JIT mode
-jit_mode = False
-
-
 def model_act(times):
     """
     times: ntime x nbatch
@@ -92,20 +88,20 @@ def model_act(times):
 
 
 class Integrator(pyro.nn.PyroModule):
-    def __init__(self, eqn, y0, extra_params=[]):
+    def __init__(self, eqn, y0, extra_params=[], block_size = 1):
         super().__init__()
         self.eqn = eqn
         self.y0 = y0
         self.extra_params = extra_params
+        self.block_size = block_size
 
     def forward(self, times):
         return ode.odeint_adjoint(
             self.eqn,
             self.y0,
             times,
-            jit_mode=jit_mode,
+            block_size = self.block_size,
             extra_params=self.extra_params,
-            atol=1e-4,
         )
 
 
@@ -123,7 +119,7 @@ def forward(self, t, y):
         f[..., 1] = self.v * torch.sin(self.a) - g * t
 
         # Nice ODE lol
-        df = torch.zeros(y.shape + y.shape[1:])
+        df = torch.zeros(y.shape + y.shape[-1:])
 
         return f, df
 
@@ -256,6 +252,9 @@ def gen_extra(self):
     tmax = 20.0
     tnum = 100
 
+    # Number of vectorized time steps to evaluate at once
+    time_block = 50
+
     time = torch.linspace(0, tmax, tnum)
     times = torch.empty(tnum, nsamples)
     data = torch.empty(tnum, nsamples, 2)
@@ -275,7 +274,8 @@ def gen_extra(self):
     pyro.clear_param_store()
 
     def maker(v, a, **kwargs):
-        return Integrator(ODE(v, a), torch.zeros(nsamples, 2), **kwargs)
+        return Integrator(ODE(v, a), torch.zeros(nsamples, 2),
+                block_size = time_block, **kwargs)
 
     # Setup the model
     model = Model(
@@ -294,10 +294,7 @@ def maker(v, a, **kwargs):
     guide(times)
 
     optimizer = optim.ClippedAdam({"lr": lr})
-    if jit_mode:
-        l = JitTrace_ELBO(num_particles=num_samples)
-    else:
-        l = Trace_ELBO(num_particles=num_samples)
+    l = Trace_ELBO(num_particles=num_samples)
 
     svi = SVI(model, guide, optimizer, loss=l)
 
diff --git a/examples/ode/vanderpohl.py b/examples/ode/vanderpohl.py
index 899d9a9..f42fe24 100755
--- a/examples/ode/vanderpohl.py
+++ b/examples/ode/vanderpohl.py
@@ -58,7 +58,7 @@ def forward(self, t, y):
         f[..., 0] = y[..., 1]
         f[..., 1] = -y[..., 0] + self.mu * (1.0 - y[..., 0] ** 2.0) * y[..., 1]
 
-        df = torch.empty(y.shape + y.shape[1:])
+        df = torch.empty(y.shape + y.shape[-1:])
         df[..., 0, 0] = 0
         df[..., 0, 1] = 1
         df[..., 1, 0] = -1 - 2.0 * self.mu * y[..., 0] * y[..., 1]
@@ -68,19 +68,24 @@ def forward(self, t, y):
 
 
 if __name__ == "__main__":
-    # Common
+    # Common inputs
     y0 = torch.tensor([[-1.0, 1]])
     period = lambda m: (3.0 - 2 * np.log(2)) * m + 2.0 * np.pi / mu ** (1.0 / 3)
 
+    # Number of vectorized time steps
+    time_chunk = 10
+
     # Test for non-stiff version
     mu = 1.0
-    times = torch.linspace(0, period(mu) * 10, 1000).reshape(1000, 1)
+    times = torch.linspace(0, period(mu) * 10, 10000).unsqueeze(-1)
 
     model = VanderPolODE(mu)
 
-    res_exp = ode.odeint(model, y0, times, method="forward-euler", substep=10)
+    res_exp = ode.odeint(model, y0, times, method="forward-euler",
+            block_size = time_chunk, guess_type = "previous")
     res_imp = ode.odeint(
-        model, y0, times, method="backward-euler", substep=10, solver_method="lu"
+        model, y0, times, method="backward-euler",
+        block_size = time_chunk, guess_type = "previous"
     )
 
     plt.figure()
@@ -94,13 +99,15 @@ def forward(self, t, y):
 
     # Test for moderately stiff version
     mu = 14.0
-    times = torch.linspace(0, period(mu) * 10, 1000).reshape(1000, 1)
+    times = torch.linspace(0, period(mu) * 10, 10000).unsqueeze(-1)
 
     model = VanderPolODE(mu)
 
-    res_exp = ode.odeint(model, y0, times, method="forward-euler", substep=10)
+    res_exp = ode.odeint(model, y0, times, method="forward-euler",
+            block_size = time_chunk, guess_type = "previous")
     res_imp = ode.odeint(
-        model, y0, times, method="backward-euler", substep=10, solver_method="lu"
+        model, y0, times, method="backward-euler",
+        block_size = time_chunk, guess_type = "previous"
     )
 
     plt.figure()
@@ -114,12 +121,13 @@ def forward(self, t, y):
 
     # Test for highly stiff version (explicit just explodes)
     mu = 30.0
-    times = torch.linspace(0, period(mu) * 10 * 10.0 / 16, 1000).reshape(1000, 1)
+    times = torch.linspace(0, period(mu) * 10 * 10.0 / 16, 20000).unsqueeze(-1)
 
     model = VanderPolODE(mu)
 
     res_imp = ode.odeint(
-        model, y0, times, method="backward-euler", substep=20, solver_method="lu"
+        model, y0, times, method="backward-euler",
+        block_size = time_chunk, guess_type = "previous"
     )
 
     plt.figure()
diff --git a/examples/performance/linear_solvers.py b/examples/performance/linear_solvers.py
deleted file mode 100755
index e14e850..0000000
--- a/examples/performance/linear_solvers.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-sys.path.append("../..")
-
-import time
-
-from pyoptmat import solvers
-
-import torch
-
-import time
-
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-if torch.cuda.is_available():
-    dev = "cuda:0"
-else:
-    dev = "cpu"
-device = torch.device(dev)
-
-if __name__ == "__main__":
-    nbatch = 10000
-    nsize = 20
-
-    A = torch.rand((nbatch,nsize,nsize), device = device)
-    b = torch.rand((nbatch,nsize), device = device)
-    
-    t1 = time.time()
-    for i in range(10):
-        x = solvers.lu_gmres(A, b, check = min(nsize+1,20), maxiter = nsize+1)
-    time_gmres = time.time() - t1
-    
-    t1 = time.time()
-    for i in range(10):
-        x = solvers.lu_linear_solve(A, b)
-    time_lu = time.time() - t1
-
-    print("GMRES: %f" % time_gmres)
-    print("LU: %f" % time_lu)
-
diff --git a/examples/structural-inference/creep-fatigue/deterministic/optimize.py b/examples/structural-inference/creep-fatigue/deterministic/optimize.py
index 9e1876a..3512903 100755
--- a/examples/structural-inference/creep-fatigue/deterministic/optimize.py
+++ b/examples/structural-inference/creep-fatigue/deterministic/optimize.py
@@ -51,6 +51,9 @@ def make(n, eta, s0, R, d, C, g, **kwargs):
 
 
 if __name__ == "__main__":
+    # Number of vectorized time steps
+    time_chunk_size = 10
+
     # 1) Load the data for the variance of interest,
     #    cut down to some number of samples, and flatten
     scale = 0.15
@@ -73,7 +76,7 @@ def make(n, eta, s0, R, d, C, g, **kwargs):
     print("")
 
     # 3) Create the actual model
-    model = optimize.DeterministicModel(make, names, ics)
+    model = optimize.DeterministicModel(lambda *args, **kwargs: make(*args, block_size = time_chunk_size, **kwargs), names, ics)
 
     # 4) Setup the optimizer
     niter = 200
diff --git a/examples/structural-inference/creep-fatigue/statistical/infer.py b/examples/structural-inference/creep-fatigue/statistical/infer.py
index 0183cd1..2359ce7 100755
--- a/examples/structural-inference/creep-fatigue/statistical/infer.py
+++ b/examples/structural-inference/creep-fatigue/statistical/infer.py
@@ -52,6 +52,9 @@ def make(n, eta, s0, R, d, C, g, **kwargs):
 
 
 if __name__ == "__main__":
+    # Number of vectorized time steps
+    time_chunk_size = 10
+
     # 1) Load the data for the variance of interest,
     #    cut down to some number of samples, and flatten
     scale = 0.05
@@ -84,7 +87,7 @@ def make(n, eta, s0, R, d, C, g, **kwargs):
 
     # 3) Create the actual model
     model = optimize.HierarchicalStatisticalModel(
-        make,
+        lambda *args, **kwargs: make(*args, block_size = time_chunk_size, **kwargs),
         names,
         loc_loc_priors,
         loc_scale_priors,
diff --git a/examples/structural-inference/tension-rate-switch/.gitattributes b/examples/structural-inference/tension-rate-switch/.gitattributes
deleted file mode 100644
index bfded1c..0000000
--- a/examples/structural-inference/tension-rate-switch/.gitattributes
+++ /dev/null
@@ -1,2 +0,0 @@
-*.dill filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
diff --git a/examples/structural-inference/tension-rate-switch/deterministic/optimize.py b/examples/structural-inference/tension-rate-switch/deterministic/optimize.py
deleted file mode 100755
index ef8e658..0000000
--- a/examples/structural-inference/tension-rate-switch/deterministic/optimize.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-    Example using the tutorial data to train a deterministic model, rather than
-    a statistical model.
-"""
-
-import sys
-
-sys.path.append("../../../..")
-sys.path.append("..")
-
-import os.path
-
-import numpy.random as ra
-
-import xarray as xr
-import torch
-
-from maker import make_model, downsample, params_true
-
-from pyoptmat import optimize, experiments, scaling
-from tqdm import tqdm
-
-import matplotlib.pyplot as plt
-
-# Don't care if integration fails
-import warnings
-
-warnings.filterwarnings("ignore")
-
-# Use doubles
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-# Select device to run on
-if torch.cuda.is_available():
-    dev = "cuda:0"
-else:
-    dev = "cpu"
-dev = "cpu"
-device = torch.device(dev)
-
-# Maker function returns the ODE model given the parameters
-# Don't try to optimize for the Young's modulus
-def make(g0, A, C, R, d, **kwargs):
-    """
-        Maker with the Young's modulus fixed
-    """
-    return make_model(g0, A, C, R, d, device=device, **kwargs).to(
-        device
-    )
-
-
-if __name__ == "__main__":
-    # 1) Load the data for the variance of interest,
-    #    cut down to some number of samples, and flatten
-    scale = 0.01
-    nsamples = 10  # at each strain rate
-    input_data = xr.open_dataset(os.path.join("..", "scale-%3.2f.nc" % scale))
-    data, results, cycles, types, control = downsample(
-        experiments.load_results(input_data, device=device),
-        nsamples,
-        input_data.nrates,
-        input_data.nsamples,
-    )
-
-    # 2) Setup names for each parameter and the initial conditions
-    names = ["g0", "A", "C", "R", "d"]
-    rng = 0.1
-    ics = torch.tensor([ra.uniform((1-rng)*p,(1+rng)*p) for p in params_true])
-
-    # 3) Calculate the initial gradient values
-    model = optimize.DeterministicModel(lambda *args: make(*args), names, ics)
-    loss = torch.nn.MSELoss(reduction="sum")
-
-    lossv = loss(model(data, cycles, types, control), results)
-    lossv.backward()
-    print("Initial parameter gradients:")
-    grads = []
-    for n in names:
-        grads.append(getattr(model,n).grad.detach())
-        print("%s:\t%.3e" % (n, grads[-1]))
-    print("")
-
-    # 4) Set up some scaling functions
-    scale_functions = [
-            scaling.BoundedScalingFunction(
-                torch.tensor(pv*(1-rng), device = device),
-                torch.tensor(pv*(1+rng))) for 
-            pv in ics]
-    ics = torch.tensor([sf.unscale(i) for i, sf in zip(ics, scale_functions)])
-
-    print("Initial parameter values:")
-    for n, ic, sf in zip(names, ics, scale_functions):
-        print("%s:\t%3.2e -> %3.2f" % (n, ic, sf.scale(ic)))
-    print("")
-
-    # 5) Create the actual model
-    model = optimize.DeterministicModel(lambda *args: make(*args, 
-        scale_functions = scale_functions), names, ics)
-
-    lossv = loss(model(data, cycles, types, control), results)
-    lossv.backward()
-    print("Scaled parameter gradients:")
-    grads = []
-    for n in names:
-        grads.append(getattr(model,n).grad.detach())
-        print("%s:\t%.3e" % (n, grads[-1]))
-    print("")
-
-    # 6) Setup the optimizer
-    niter = 200
-    lr = 3.0e-3
-    max_norm = 1.0e2
-    optim = torch.optim.Adam(model.parameters(), lr = lr)
-    #optim = torch.optim.LBFGS(model.parameters())
-
-
-    # 7) Actually do the optimization!
-    def closure():
-        optim.zero_grad()
-        pred = model(data, cycles, types, control)
-        lossv = loss(pred, results)
-        lossv.backward()
-        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
-        return lossv
-
-    t = tqdm(range(niter), total=niter, desc="Loss:    ")
-    loss_history = []
-    for i in t:
-        closs = optim.step(closure)
-        loss_history.append(closs.detach().cpu().numpy())
-        t.set_description("Loss: %3.2e" % loss_history[-1])
-
-    # 8) Check accuracy of the optimized parameters
-    print("")
-    print("Optimized parameter accuracy:")
-    for n, sf, tp in zip(names, scale_functions, params_true):
-        print("%s:\t%3.2f/%3.2f" % (n, sf.scale(getattr(model, n).data), tp))
-
-    # 9) Plot the convergence history
-    plt.figure()
-    plt.plot(loss_history)
-    plt.xlabel("Iteration")
-    plt.ylabel("Loss")
-    plt.tight_layout()
-    plt.show()
diff --git a/examples/structural-inference/tension-rate-switch/maker.py b/examples/structural-inference/tension-rate-switch/maker.py
deleted file mode 100755
index 8536f18..0000000
--- a/examples/structural-inference/tension-rate-switch/maker.py
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/usr/bin/env python
-
-"""
-    Helper functions for the structural material model inference with
-    tension tests examples.
-"""
-
-import sys
-
-sys.path.append("../../..")
-
-import numpy as np
-import numpy.random as ra
-
-import xarray as xr
-
-import torch
-from pyoptmat import models, flowrules, hardening, optimize, temperature
-from pyoptmat.temperature import ConstantParameter as CP
-
-from tqdm import tqdm
-
-import tqdm
-
-import warnings
-
-warnings.filterwarnings("ignore")
-
-# Use doubles
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-# Actual parameters
-A_true = -3.4
-C_true = -5.9
-g0_true = 0.60
-R_true = 200.0
-d_true = 5.0
-params_true = [g0_true, A_true, C_true, R_true, d_true]
-
-# Various fixed parameters for the KM model
-eps0 = 1e6
-k = 1.380649e-20
-b = 2.02e-7
-eps0_ri = 1e-10
-lmbda = 0.99
-steep = 100.0
-n_constant = 2.0
-eta_constant = 100.0
-
-# Constant temperature to run simulations at 
-T = 550.0 + 273.15
-
-# Select device to run on
-if torch.cuda.is_available():
-    dev = "cuda:0"
-else:
-    dev = "cpu"
-dev = "cpu"
-device = torch.device(dev)
-
-
-def make_model(g0, A, C, R, d, scale_functions = None, device=torch.device("cpu"), **kwargs):
-    """
-    Key function for the entire problem: given parameters generate the model
-    """
-    E = temperature.PolynomialScaling(
-        [-6.82798735e-05, 9.48207244e-02, -1.11026526e02, 2.21183687e05]
-    )
-    mu = temperature.PolynomialScaling(
-        [-2.60610204e-05, 3.61911162e-02, -4.23765368e01, 8.44212545e04]
-    )
-    
-    if scale_functions is None:
-        g0_scale = lambda x: x
-        A_scale = lambda x: x
-        C_scale = lambda x: x
-        R_scale = lambda x: x
-        d_scale = lambda x: x
-    else:
-        g0_scale = scale_functions[0]
-        A_scale = scale_functions[1]
-        C_scale = scale_functions[2] 
-        R_scale = scale_functions[3] 
-        d_scale = scale_functions[4]
-
-    isotropic = hardening.VoceIsotropicHardeningModel(
-        CP(R, scaling=R_scale), CP(d, scaling=d_scale))
-
-    kinematic = hardening.NoKinematicHardeningModel()
-    
-    n = temperature.KMRateSensitivityScaling(A, mu, 
-            torch.tensor(b, device = device), torch.tensor(k, device = device),
-            A_scale = A_scale)
-    eta = temperature.KMViscosityScalingGC(A,
-            C, g0,
-            mu, torch.tensor(eps0, device = device), 
-            torch.tensor(b, device = device),
-            torch.tensor(k, device = device),
-            A_scale = A_scale, C_scale = C_scale, g0_scale = g0_scale)
-    s0 = temperature.ShearModulusScalingExp(C, mu, A_scale = C_scale)
-
-    rd_flowrule = flowrules.IsoKinViscoplasticity(n, eta,
-        CP(torch.tensor(0.0, device = device)),
-        isotropic,
-        kinematic,
-    )
-    
-    ri_flowrule_base = flowrules.IsoKinViscoplasticity(
-            CP(torch.tensor(n_constant, device = device)), 
-            CP(torch.tensor(eta_constant, device = device)),
-            s0, isotropic, kinematic)
-    ri_flowrule = flowrules.RateIndependentFlowRuleWrapper(
-            ri_flowrule_base, lmbda, eps0_ri)
-    flowrule = flowrules.SoftKocksMeckingRegimeFlowRule(
-            ri_flowrule, rd_flowrule, 
-            torch.tensor(g0, device = device),
-            mu, 
-            torch.tensor(b, device = device),
-            torch.tensor(eps0, device = device),
-            torch.tensor(k, device = device),
-            torch.tensor(steep, device = device),
-            g0_scale = g0_scale)
-
-    model = models.InelasticModel(E, flowrule)
-
-    return models.ModelIntegrator(model, **kwargs)
-
-
-def generate_input(erates, emax, ntime):
-    """
-    Generate the times and strains given the strain rates, maximum strain, and number of time steps
-    """
-    strain = torch.repeat_interleave(
-        torch.linspace(0, emax, ntime, device=device)[None, :], len(erates), 0
-    ).T.to(device)
-    time = strain / erates
-
-    return time, strain
-
-
-def downsample(rawdata, nkeep, nrates, nsamples):
-    """
-    Return fewer than the whole number of samples for each strain rate
-    """
-    ntime = rawdata[0].shape[1]
-    return tuple(
-        data.reshape(data.shape[:-1] + (nrates, nsamples))[..., :nkeep].reshape(
-            data.shape[:-1] + (-1,)
-        )
-        for data in rawdata
-    )
-
-
-if __name__ == "__main__":
-    # Running this script will regenerate the data
-    ntime = 200
-    emax = 0.5
-    erates = torch.logspace(-2, -8, 7, device=device)
-    nrates = len(erates)
-    nsamples = 50
-
-    scales = [0.0, 0.01, 0.05, 0.1, 0.15]
-
-    times, strains = generate_input(erates, emax, ntime)
-
-    for scale in scales:
-        print("Generating data for scale = %3.2f" % scale)
-        full_times = torch.empty((ntime, nrates, nsamples), device=device)
-        full_strains = torch.empty_like(full_times)
-        full_stresses = torch.empty_like(full_times)
-        full_temperatures = torch.full_like(full_strains, T)
-
-        for i in tqdm.tqdm(range(nsamples)):
-            full_times[:, :, i] = times
-            full_strains[:, :, i] = strains
-
-            # True values are 0.5 with our scaling so this is easy
-            model = make_model(
-                torch.tensor(ra.normal(0.5, scale), device=device),
-                torch.tensor(ra.normal(0.5, scale), device=device),
-                torch.tensor(ra.normal(0.5, scale), device=device),
-                torch.tensor(ra.normal(0.5, scale), device=device),
-                torch.tensor(ra.normal(0.5, scale), device=device),
-            )
-
-            with torch.no_grad():
-                full_stresses[:, :, i] = model.solve_strain(
-                    times, strains, full_temperatures[:, :, i]
-                )[:, :, 0]
-
-        full_cycles = torch.zeros_like(full_times, dtype=int, device=device)
-        types = np.array(["tensile"] * (nsamples * len(erates)))
-        controls = np.array(["strain"] * (nsamples * len(erates)))
-
-        ds = xr.Dataset(
-            {
-                "time": (["ntime", "nexp"], full_times.flatten(-2, -1).cpu().numpy()),
-                "strain": (
-                    ["ntime", "nexp"],
-                    full_strains.flatten(-2, -1).cpu().numpy(),
-                ),
-                "stress": (
-                    ["ntime", "nexp"],
-                    full_stresses.flatten(-2, -1).cpu().numpy(),
-                ),
-                "temperature": (
-                    ["ntime", "nexp"],
-                    full_temperatures.cpu().flatten(-2, -1).numpy(),
-                ),
-                "cycle": (["ntime", "nexp"], full_cycles.flatten(-2, -1).cpu().numpy()),
-                "type": (["nexp"], types),
-                "control": (["nexp"], controls),
-            },
-            attrs={"scale": scale, "nrates": nrates, "nsamples": nsamples},
-        )
-
-        ds.to_netcdf("scale-%3.2f.nc" % scale)
diff --git a/examples/structural-inference/tension-rate-switch/scale-0.00.nc b/examples/structural-inference/tension-rate-switch/scale-0.00.nc
deleted file mode 100644
index 217ba94..0000000
--- a/examples/structural-inference/tension-rate-switch/scale-0.00.nc
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:077dcaf588e51857867e59d85a8e1f1af23d8bdd2ffd37e1981f9dbabe0b4845
-size 2848678
diff --git a/examples/structural-inference/tension-rate-switch/scale-0.01.nc b/examples/structural-inference/tension-rate-switch/scale-0.01.nc
deleted file mode 100644
index 4bd4795..0000000
--- a/examples/structural-inference/tension-rate-switch/scale-0.01.nc
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:286ec4287fd5748269aed67ee9ef0285289d03b87aba3457f222a60bf971596f
-size 2848678
diff --git a/examples/structural-inference/tension-rate-switch/scale-0.05.nc b/examples/structural-inference/tension-rate-switch/scale-0.05.nc
deleted file mode 100644
index 5dff3a3..0000000
--- a/examples/structural-inference/tension-rate-switch/scale-0.05.nc
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:77ae14efdca732b9803c5f39bc461b35f673cf51e8c3e22d6082e8f2dff5fb63
-size 2848678
diff --git a/examples/structural-inference/tension-rate-switch/scale-0.10.nc b/examples/structural-inference/tension-rate-switch/scale-0.10.nc
deleted file mode 100644
index abad3f3..0000000
--- a/examples/structural-inference/tension-rate-switch/scale-0.10.nc
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d62771deb440473db8124a98571e2e1375e67edd9606b2a5dd7a05f2b9417060
-size 2848678
diff --git a/examples/structural-inference/tension-rate-switch/scale-0.15.nc b/examples/structural-inference/tension-rate-switch/scale-0.15.nc
deleted file mode 100644
index f923e40..0000000
--- a/examples/structural-inference/tension-rate-switch/scale-0.15.nc
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:97f23a918112da108d1ccbfde6559aef032073d6309b360120a0ec285ddebd2e
-size 2848678
diff --git a/examples/structural-inference/tension-rate-switch/statistical/demo_plot.py b/examples/structural-inference/tension-rate-switch/statistical/demo_plot.py
deleted file mode 100755
index 6214047..0000000
--- a/examples/structural-inference/tension-rate-switch/statistical/demo_plot.py
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import os.path
-
-sys.path.append("../../../..")
-sys.path.append("..")
-
-import xarray as xr
-import torch
-
-from maker import make_model, downsample
-
-from pyoptmat import optimize, experiments
-from tqdm import trange
-
-import matplotlib.pyplot as plt
-from matplotlib.lines import Line2D
-from matplotlib.patches import Patch
-
-import warnings
-
-warnings.filterwarnings("ignore")
-
-# Use doubles
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-# Run on this on the cpu
-dev = "cpu"
-device = torch.device(dev)
-
-# Don't try to optimize for the Young's modulus
-def make(n, eta, s0, R, d, **kwargs):
-    """
-        Maker with the Young's modulus fixed
-    """
-    return make_model(torch.tensor(0.5), n, eta, s0, R, d, device=device, **kwargs).to(
-        device
-    )
-
-
-if __name__ == "__main__":
-    # 1) Load the data for the variance of interest,
-    #    cut down to some number of samples, and flatten
-    scale = 0.05
-    nsamples = 10  # at each strain rate
-    input_data = xr.open_dataset(os.path.join("..", "scale-%3.2f.nc" % scale))
-    data, results, cycles, types, control = downsample(
-        experiments.load_results(input_data, device=device),
-        nsamples,
-        input_data.nrates,
-        input_data.nsamples,
-    )
-
-    names = ["n", "eta", "s0", "R", "d"]
-    sampler = optimize.StatisticalModel(
-        make,
-        names,
-        [0.50, 0.49, 0.49, 0.48, 0.48],
-        [0.02, 0.02, 0.03, 0.05, 0.05],
-        torch.tensor(1.0e-4),
-    )
-
-    plt.figure()
-    plt.plot(data[2, :, :nsamples].cpu(), results[:, :nsamples].cpu(), "k--")
-
-    nsamples = 100
-    alpha = 0.05 / 2
-
-    times, strains, temps, cycles = experiments.make_tension_tests(
-        torch.tensor([1.0e-2]), torch.tensor([0]), torch.tensor([0.5]), 200
-    )
-    data = torch.stack((times, temps, strains))
-    control = torch.zeros(1, dtype=int)
-    types = torch.zeros(1, dtype=int)
-
-    stress_results = torch.zeros(nsamples, data.shape[1])
-
-    for i in trange(nsamples):
-        stress_results[i, :] = sampler(data, cycles, types, control)[:, 0]
-
-    mean_result = torch.mean(stress_results, 0)
-    sresults, _ = torch.sort(stress_results, 0)
-    min_result = sresults[int(alpha * nsamples), :]
-    max_result = sresults[int((1 - alpha) * nsamples), :]
-
-    (l,) = plt.plot(data[2, :, 0], mean_result, lw=4, color="k")
-    p = plt.fill_between(data[2, :, 0], min_result, max_result, alpha=0.5, color="k")
-
-    plt.legend(
-        [
-            Line2D([0], [0], color="k", ls="--"),
-            Line2D([0], [0], color="k", lw=4),
-            Patch(facecolor="k", edgecolor=None, alpha=0.5),
-        ],
-        ["Experimental data", "Model average", "Model 95% prediction interval"],
-        loc="best",
-    )
-
-    plt.xlabel("Strain (mm/mm)")
-    plt.ylabel("Stress (MPa)")
-    plt.show()
diff --git a/examples/structural-inference/tension-rate-switch/statistical/infer.py b/examples/structural-inference/tension-rate-switch/statistical/infer.py
deleted file mode 100755
index 092ea78..0000000
--- a/examples/structural-inference/tension-rate-switch/statistical/infer.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-    Tutorial example of training a statistical model to tension test data
-    from from a known distribution.
-"""
-
-import sys
-import os.path
-
-sys.path.append("../../../..")
-sys.path.append("..")
-
-import numpy.random as ra
-
-import xarray as xr
-import torch
-
-from maker import make_model, downsample
-
-from pyoptmat import optimize, experiments
-from tqdm import tqdm
-
-import pyro
-from pyro.infer import SVI
-import pyro.optim as optim
-
-import matplotlib.pyplot as plt
-
-import warnings
-
-warnings.filterwarnings("ignore")
-
-# Use doubles
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-# Run on GPU!
-if torch.cuda.is_available():
-    dev = "cuda:0"
-else:
-    dev = "cpu"
-device = torch.device(dev)
-
-# Don't try to optimize for the Young's modulus
-def make(n, eta, s0, R, d, **kwargs):
-    """
-        Maker with Young's modulus fixed
-    """
-    return make_model(torch.tensor(0.5), n, eta, s0, R, d, device=device, **kwargs).to(
-        device
-    )
-
-
-if __name__ == "__main__":
-    # 1) Load the data for the variance of interest,
-    #    cut down to some number of samples, and flatten
-    scale = 0.05
-    nsamples = 10  # at each strain rate
-    input_data = xr.open_dataset(os.path.join("..", "scale-%3.2f.nc" % scale))
-    data, results, cycles, types, control = downsample(
-        experiments.load_results(input_data, device=device),
-        nsamples,
-        input_data.nrates,
-        input_data.nsamples,
-    )
-
-    # 2) Setup names for each parameter and the priors
-    names = ["n", "eta", "s0", "R", "d"]
-    loc_loc_priors = [
-        torch.tensor(ra.random(), device=device) for i in range(len(names))
-    ]
-    loc_scale_priors = [torch.tensor(0.15, device=device) for i in range(len(names))]
-    scale_scale_priors = [torch.tensor(0.15, device=device) for i in range(len(names))]
-
-    eps = torch.tensor(1.0e-4, device=device)
-
-    print("Initial parameter values:")
-    print("\tloc loc\t\tloc scale\tscale scale")
-    for n, llp, lsp, sp in zip(
-        names, loc_loc_priors, loc_scale_priors, scale_scale_priors
-    ):
-        print("%s:\t%3.2f\t\t%3.2f\t\t%3.2f" % (n, llp, lsp, sp))
-    print("")
-
-    # 3) Create the actual model
-    model = optimize.HierarchicalStatisticalModel(
-        make, names, loc_loc_priors, loc_scale_priors, scale_scale_priors, eps
-    ).to(device)
-
-    # 4) Get the guide
-    guide = model.make_guide()
-
-    # 5) Setup the optimizer and loss
-    lr = 1.0e-2
-    g = 1.0
-    niter = 200
-    lrd = g ** (1.0 / niter)
-    num_samples = 1
-
-    optimizer = optim.ClippedAdam({"lr": lr, "lrd": lrd})
-
-    ls = pyro.infer.Trace_ELBO(num_particles=num_samples)
-
-    svi = SVI(model, guide, optimizer, loss=ls)
-
-    # 6) Infer!
-    t = tqdm(range(niter), total=niter, desc="Loss:    ")
-    loss_hist = []
-    for i in t:
-        loss = svi.step(data, cycles, types, control, results)
-        loss_hist.append(loss)
-        t.set_description("Loss %3.2e" % loss)
-
-    # 7) Print out results
-    print("")
-    print("Inferred distributions:")
-    print("\tloc\t\tscale")
-    for n in names:
-        s = pyro.param(n + model.scale_suffix + model.param_suffix).data
-        m = pyro.param(n + model.loc_suffix + model.param_suffix).data
-        print("%s:\t%3.2f/0.50\t%3.2f/%3.2f" % (n, m, s, scale))
-    print("")
-
-    # 8) Plot convergence
-    plt.figure()
-    plt.loglog(loss_hist)
-    plt.xlabel("Iteration")
-    plt.ylabel("Loss")
-    plt.tight_layout()
-    plt.show()
diff --git a/examples/structural-inference/tension-rate-switch/visualize_scatter_km.py b/examples/structural-inference/tension-rate-switch/visualize_scatter_km.py
deleted file mode 100755
index 0aa36fa..0000000
--- a/examples/structural-inference/tension-rate-switch/visualize_scatter_km.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-    Simple helper to make a plot illustrating the variation in the
-    synthetic experimental data.
-"""
-
-import sys
-
-sys.path.append("../../..")
-
-import xarray as xr
-import torch
-import matplotlib.pyplot as plt
-import seaborn as sns
-from pyoptmat import temperature, experiments
-
-from maker import make_model, downsample
-
-# Various fixed parameters for the KM model
-eps0 = 1e6
-k = 1.380649e-20
-b = 2.02e-7
-eps0_ri = 1e-10
-
-# Constant temperature to run simulations at 
-T = 550.0 + 273.15
-
-# Use doubles
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-def find_yield(strain, stress, offset = 0.2/100):
-    E = (stress[1] - stress[0]) / (strain[1] - strain[0])
-    S = E * (strain - offset)
-    i = torch.argmax(stress-S, dim = 0, keepdim = True) + 1
-    return torch.gather(stress, 0, i).squeeze(0)
-
-if __name__ == "__main__":
-    scale = 0.01
-
-    data = xr.load_dataset("scale-%3.2f.nc" % scale)
-
-    strain = torch.tensor(data.strain.data.reshape(-1, data.nrates, data.nsamples))
-    stress = torch.tensor(data.stress.data.reshape(-1, data.nrates, data.nsamples))
-    erates = torch.logspace(-2, -8, 7)
-    erates = torch.repeat_interleave(erates.unsqueeze(-1), 50, dim = 1)
-
-    mu = temperature.PolynomialScaling(
-        [-2.60610204e-05, 3.61911162e-02, -4.23765368e01, 8.44212545e04]
-    )
-    muv = mu(torch.tensor(T))
-
-    g = k* T / (muv * b**3.0) * torch.log(eps0 / erates)
-    Sy = find_yield(strain, stress)
-    muSy = Sy / muv
-    
-    model = make_model(torch.tensor(0.59), torch.tensor(-3.08), torch.tensor(-5.88),
-            torch.tensor(199.88), torch.tensor(4.70))
-
-    scale = 0.01
-    nsamples = 50  # at each strain rate
-    inp_data, results, cycles, types, control = downsample(
-        experiments.load_results(data),
-        nsamples,
-        data.nrates,
-        data.nsamples,
-    )
-    
-    with torch.no_grad():
-        results = model.solve_strain(inp_data[0], inp_data[2], inp_data[1])
-    stress = results[:,:,0]
-    Sy_res = find_yield(inp_data[2], stress)
-    
-    Sy_res = Sy_res.reshape(data.nrates,data.nsamples)
-    
-    gs = torch.argsort(g.flatten())
-    g1 = g.flatten()[gs]
-    y1 = muSy.flatten()[gs]
-
-    sns.lineplot(x = g1, y = y1, errorbar = ('ci', 95))
-    sns.scatterplot(x = g.flatten(), y = muSy.flatten())
-
-    sns.lineplot(x = g[:,0], y = Sy_res[:,0] / muv)
-    plt.yscale('log')
-    plt.xlabel(r"$\frac{kT}{\mu b^3} \log{\frac{\dot{\varepsilon}_0}{\dot{\varepsilon}}}$")
-    plt.ylabel(r"$\frac{\sigma_y}{\mu}$")
-    plt.legend(loc='best', labels = ['Regression, mean', 'Regression, 95%',  'Data', 'Model'])
-    plt.tight_layout()
-    plt.savefig("km-nice.png", dpi = 300)
-
diff --git a/examples/structural-inference/tension-rate-switch/visualize_variance.py b/examples/structural-inference/tension-rate-switch/visualize_variance.py
deleted file mode 100755
index 8e17cef..0000000
--- a/examples/structural-inference/tension-rate-switch/visualize_variance.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-    Simple helper to make a plot illustrating the variation in the
-    synthetic experimental data.
-"""
-
-import sys
-
-sys.path.append("../../..")
-
-import xarray as xr
-import torch
-import matplotlib.pyplot as plt
-
-
-# Use doubles
-torch.set_default_tensor_type(torch.DoubleTensor)
-
-if __name__ == "__main__":
-    scales = [0.0, 0.01, 0.05, 0.1, 0.15]
-
-    for scale in scales:
-        data = xr.load_dataset("scale-%3.2f.nc" % scale)
-
-        strain = data.strain.data.reshape(-1, data.nrates, data.nsamples)
-        stress = data.stress.data.reshape(-1, data.nrates, data.nsamples)
-        
-        for i in range(data.nrates):
-            plt.plot(strain[:, i], stress[:, i])
-            plt.xlabel("Strain (mm/mm)")
-            plt.ylabel("Stress (MPa)")
-            plt.title("Scale = %3.2f" % scale)
-            plt.tight_layout()
-            plt.show()
diff --git a/examples/structural-inference/tension-temperature/data.nc b/examples/structural-inference/tension-temperature/data.nc
new file mode 100644
index 0000000..a61ec0a
--- /dev/null
+++ b/examples/structural-inference/tension-temperature/data.nc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7039ccaa735ec74296752b82ef047c7dc80cc2873b75a444cd036796d76460cf
+size 3126822
diff --git a/examples/structural-inference/tension-temperature/generate_data.py b/examples/structural-inference/tension-temperature/generate_data.py
new file mode 100755
index 0000000..1e83819
--- /dev/null
+++ b/examples/structural-inference/tension-temperature/generate_data.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+
+"""
+    Model generators and code to generate synthetic data
+"""
+
+import sys
+sys.path.append('../../..')
+
+import numpy as np
+import torch
+import xarray as xr
+
+import tqdm
+
+from pyoptmat import models, flowrules, hardening, temperature, experiments, optimize
+
+# Use doubles
+torch.set_default_tensor_type(torch.DoubleTensor)
+
+# Select device to run on
+if torch.cuda.is_available():
+    dev = "cuda:0"
+else:
+    dev = "cpu"
+device = torch.device(dev)
+
+# True model parameters
+# Elastic constants
+E_poly = torch.tensor([-5.76107011e-05,  7.52478382e-02, -9.98116448e+01,  2.19193150e+05], device = device)
+mu_poly = torch.tensor([-2.19888172e-05,  2.87205489e-02, -3.80960476e+01,  8.36615078e+04], device = device)
+
+# Rate sensitivity
+A = -3.35
+B = -3.23
+
+# Hardening (initial slope)
+H = 0.1
+
+# Hardening (saturation)
+tau0 = 1000.0
+Q = 800.0
+
+# Various physical/reference constants
+eps0 = torch.tensor(1.0e6, device = device)
+b = torch.tensor(2.019e-7, device = device)
+k = torch.tensor(1.38064e-20, device = device)
+
+# Setup constants
+E = temperature.PolynomialScaling(E_poly)
+mu = temperature.PolynomialScaling(mu_poly)
+
+def true_model(A, B, H, tau0, Q, device = torch.device("cpu"), **kwargs):
+    """True analytic model
+
+    Args:
+        A (float): Kocks-Mecking slope
+        B (float): Kocks-Mecking interscept
+        H (float): fraction of shear modulus for initial hardening slope
+        tau0 (float): athermal hardening strength
+        Q (float): hardening activation energy
+    """
+    n = temperature.KMRateSensitivityScaling(A, mu, b, k)
+    eta = temperature.KMViscosityScaling(A, B, mu, eps0, b, k)
+    theta = temperature.ShearModulusScaling(H, mu)
+    tau = temperature.InverseArrheniusScaling(tau0, Q)
+
+    isotropic = hardening.Theta0VoceIsotropicHardeningModel(
+            tau,
+            theta
+            )
+
+    kinematic = hardening.NoKinematicHardeningModel()
+    flowrule = flowrules.IsoKinViscoplasticity(
+            n, 
+            eta, 
+            temperature.ConstantParameter(torch.tensor(0.0, device = device)),
+            isotropic,
+            kinematic
+    )
+    model = models.InelasticModel(
+            E,
+            flowrule
+    )
+
+    return models.ModelIntegrator(model, **kwargs).to(device)
+
+if __name__ == "__main__":
+    # Amount of variability
+    sf = 0.075
+    noise = 2.0
+    elimit = 0.1
+
+    # Number of repeat samples
+    nsamples = 4
+
+    # Integration chunk size
+    time_chunk_size = 25
+
+    # Inputs defining the loading conditions
+    nrate = 5
+    ntemp = 50
+    strain_rates = np.logspace(-5, -2, nrate)
+    temperatures = np.linspace(500+273.15,800+273.15, ntemp)
+
+    # Number of steps
+    nsteps = 75
+
+    # Generate the list of rates and temperatures
+    SR, T = np.meshgrid(strain_rates, temperatures)
+    SR = SR.flatten()
+    T = T.flatten()
+    elimits = np.ones_like(T) * elimit
+    
+    # Make the actual input data
+    times, strains, temps, cycles = experiments.make_tension_tests(SR, T, elimits, nsteps)
+    data = torch.stack((times, temps,strains))
+    control = torch.zeros((len(SR),), dtype = int)
+    types = torch.zeros_like(control)
+
+    # Setup the parameters
+    locs = torch.tensor([A, B, H, tau0, Q], device = device)
+    scales = sf * torch.abs(locs)
+    noise = torch.tensor(noise, device = device)
+
+    # Setup the statistical model
+    names = ["A", "B", "H", "tau0", "Q"]
+    model = optimize.StatisticalModel(
+            lambda *args, **kwargs: true_model(*args, block_size = time_chunk_size, **kwargs),
+            names,
+            locs.to(device), scales.to(device), noise.to(device))
+    
+    # Data storage...
+    full_stress = torch.zeros_like(strains.unsqueeze(0).repeat_interleave(nsamples, dim = 0))
+    
+    # Run forward samples
+    for i in tqdm.trange(nsamples):
+        with torch.no_grad():
+            full_stress[i,...] = model(data.to(device), cycles.to(device), types.to(device), control.to(device))
+    
+    # Expand the input data
+    full_times = times.unsqueeze(0).repeat_interleave(nsamples, dim = 0)
+    full_strains = strains.unsqueeze(0).repeat_interleave(nsamples, dim = 0)
+    full_temps = temps.unsqueeze(0).repeat_interleave(nsamples, dim = 0)
+    full_cycles = cycles.unsqueeze(0).repeat_interleave(nsamples, dim = 0)
+    full_control = control.unsqueeze(0).repeat_interleave(nsamples, dim = 0)
+    full_types = types.unsqueeze(0).repeat_interleave(nsamples, dim = 0)
+
+    string_control = np.array(["strain"] * full_control.flatten().shape[0])
+    string_types = np.array(["tensile"] * full_types.flatten().shape[0])
+
+    ds = xr.Dataset(
+        {
+            "time": (["ntime", "nexp"], full_times.transpose(0,1).flatten(-2,-1).cpu().numpy()),
+            "strain": (
+                ["ntime", "nexp"],
+                full_strains.transpose(0,1).flatten(-2,-1).cpu().numpy(),
+            ),
+            "stress": (
+                ["ntime", "nexp"],
+                full_stress.transpose(0,1).flatten(-2,-1).cpu().numpy(),
+            ),
+            "temperature": (
+                ["ntime", "nexp"],
+                full_temps.transpose(0,1).flatten(-2,-1).cpu().numpy(),
+            ),
+            "cycle": (["ntime", "nexp"], full_cycles.transpose(0,1).flatten(-2,-1).cpu().numpy()),
+            "type": (["nexp"], string_types),
+            "control": (["nexp"], string_control),
+        },
+        attrs={"nrates": nrate, "nsamples": nsamples},
+    )
+
+    ds.to_netcdf("data.nc")
diff --git a/examples/structural-inference/tension-temperature/infer.py b/examples/structural-inference/tension-temperature/infer.py
new file mode 100755
index 0000000..3984e39
--- /dev/null
+++ b/examples/structural-inference/tension-temperature/infer.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+import sys
+sys.path.append('../../..')
+
+import xarray as xr
+import torch
+import pyro
+import matplotlib.pyplot as plt
+
+import tqdm
+
+from pyoptmat import optimize, experiments, models, flowrules, hardening, temperature, scaling
+
+# Use doubles
+torch.set_default_tensor_type(torch.DoubleTensor)
+
+# Select device to run on
+if torch.cuda.is_available():
+    dev = "cuda:0"
+else:
+    dev = "cpu"
+device = torch.device(dev)
+
+# Elastic constants, don't infer
+E_poly = torch.tensor([-5.76107011e-05,  7.52478382e-02, -9.98116448e+01,  2.19193150e+05], device = device)
+E = temperature.PolynomialScaling(E_poly)
+
+if __name__ == "__main__":
+    # Time chunking
+    time_chunk_size = 5
+
+    # Load in the data
+    input_data = xr.open_dataset("data.nc")
+    data, results, cycles, types, control = experiments.load_results(
+            input_data)
+
+    # Figure out our temperature control points
+    ncontrol = 5
+    Tcontrol = torch.linspace(torch.min(data[1]), torch.max(data[1]), 
+            ncontrol, device = device)
+    
+    # Model making function
+    def make(n_vals, eta_vals, theta_vals, tau_vals, scale_functions = [lambda x: x] * 4, **kwargs):
+        n = temperature.PiecewiseScaling(Tcontrol, n_vals, values_scale_fn = scale_functions[0])
+        eta = temperature.PiecewiseScaling(Tcontrol, eta_vals, values_scale_fn = scale_functions[1])
+        theta = temperature.PiecewiseScaling(Tcontrol, theta_vals, values_scale_fn = scale_functions[2])
+        tau = temperature.PiecewiseScaling(Tcontrol, tau_vals, values_scale_fn=scale_functions[3])
+
+        isotropic = hardening.Theta0VoceIsotropicHardeningModel(
+                tau,
+                theta
+                )
+
+        kinematic = hardening.NoKinematicHardeningModel()
+        flowrule = flowrules.IsoKinViscoplasticity(
+                n, 
+                eta, 
+                temperature.ConstantParameter(torch.tensor(0.0, device = device)),
+                isotropic,
+                kinematic
+        )
+        model = models.InelasticModel(
+                E,
+                flowrule
+        )
+
+        return models.ModelIntegrator(model, **kwargs).to(device)
+    
+    # Setup initial guesses
+    names = ["n_vals", "eta_vals", "theta_vals", "tau_vals"]
+    scale_functions = [
+            scaling.BoundedScalingFunction(torch.tensor(1.1, device = device), torch.tensor(15.0, device = device)),
+            scaling.BoundedScalingFunction(torch.tensor(20.0, device = device), torch.tensor(1000.0, device = device)),
+            scaling.BoundedScalingFunction(torch.tensor(200.0, device = device), torch.tensor(20000.0, device = device)),
+            scaling.BoundedScalingFunction(torch.tensor(10.0, device = device), torch.tensor(1000.0, device = device))
+            ]
+    loc_loc_priors = [
+            scale_functions[0].unscale(torch.ones(ncontrol, device = device) * 5.0),
+            scale_functions[1].unscale(torch.ones(ncontrol, device = device) * 500.0),
+            scale_functions[2].unscale(torch.ones(ncontrol, device = device) * 3000.0),
+            scale_functions[3].unscale(torch.ones(ncontrol, device = device) * 500.0)
+            ]
+    loc_scale_priors = [0.1 * l for l in loc_loc_priors]
+    scale_scale_priors = [0.1 * l for l in loc_loc_priors]
+    eps_prior = torch.tensor(5.0, device = device)
+
+    print("Prior values")
+    for n,l,s1, s2 in zip(names, loc_loc_priors, loc_scale_priors, scale_scale_priors):
+        print(n)
+        print("\tloc: %s" % str(l.cpu()))
+        print("\tloc scale: %s" % str(s1.cpu()))
+        print("\tscale scale: %s" % str(s2.cpu()))
+
+    # Do a quick check on the reasonableness of the priors
+    test = optimize.DeterministicModel(lambda *args, **kwargs: make(*args, 
+        scale_functions=scale_functions, block_size = time_chunk_size, **kwargs),
+        names, loc_loc_priors)
+    
+    with torch.no_grad():
+        test_results = test(data.to(device), cycles.to(device), types.to(device),
+                control.to(device))
+    plt.plot(data[-1].cpu().numpy(), test_results.cpu().numpy())
+    plt.show()
+
+    # Create the actual model
+    model = optimize.HierarchicalStatisticalModel(
+            lambda *args, **kwargs: make(*args, scale_functions = scale_functions,
+                block_size = time_chunk_size, **kwargs), 
+            names, loc_loc_priors, loc_scale_priors, scale_scale_priors, eps_prior
+    ).to(device)
+
+    # Get the guide
+    guide = model.make_guide()
+
+    # 5) Setup the optimizer and loss
+    lr = 1.0e-3
+    g = 1.0
+    niter = 500
+    num_samples = 1
+
+    optimizer = pyro.optim.ClippedAdam({"lr": lr})
+
+    ls = pyro.infer.Trace_ELBO(num_particles=num_samples)
+
+    svi = pyro.infer.SVI(model, guide, optimizer, loss=ls)
+
+    # Actually infer
+    t = tqdm.tqdm(range(niter), total=niter, desc="Loss:    ")
+    loss_hist = []
+    for i in t:
+        loss = svi.step(data.to(device), cycles.to(device), types.to(device),
+                control.to(device), results.to(device))
+        loss_hist.append(loss)
+        t.set_description("Loss %3.2e" % loss)
diff --git a/examples/structural-inference/tension-temperature/survey-data.py b/examples/structural-inference/tension-temperature/survey-data.py
new file mode 100755
index 0000000..ed54a43
--- /dev/null
+++ b/examples/structural-inference/tension-temperature/survey-data.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+"""
+    Just plot the data to make sure it seems reasonable
+"""
+
+import sys
+sys.path.append('../../..')
+
+import numpy as np
+import torch
+import xarray as xr
+import matplotlib.pyplot as plt
+
+from pyoptmat import experiments
+
+# Use doubles
+torch.set_default_tensor_type(torch.DoubleTensor)
+
+if __name__ == "__main__":
+    input_data = xr.open_dataset("data.nc")
+    data, results, cycles, types, control = experiments.load_results(
+            input_data)
+
+    plt.plot(data[-1].numpy(), results.numpy())
+    plt.xlabel("Strain (mm/mm)")
+    plt.ylabel("Stress (MPa)")
+    plt.show()
diff --git a/examples/structural-inference/tension/deterministic/optimize.py b/examples/structural-inference/tension/deterministic/optimize.py
index 08aff14..5c65c9d 100755
--- a/examples/structural-inference/tension/deterministic/optimize.py
+++ b/examples/structural-inference/tension/deterministic/optimize.py
@@ -51,6 +51,9 @@ def make(n, eta, s0, R, d, **kwargs):
 
 
 if __name__ == "__main__":
+    # Number of vectorized time steps
+    time_chunk_size = 40
+
     # 1) Load the data for the variance of interest,
     #    cut down to some number of samples, and flatten
     scale = 0.01
@@ -73,10 +76,12 @@ def make(n, eta, s0, R, d, **kwargs):
     print("")
 
     # 3) Create the actual model
-    model = optimize.DeterministicModel(make, names, ics)
+    model = optimize.DeterministicModel(
+            lambda *args, **kwargs: make(*args, block_size = time_chunk_size, **kwargs),
+            names, ics)
 
     # 4) Setup the optimizer
-    niter = 10
+    niter = 5
     optim = torch.optim.LBFGS(model.parameters())
 
     # 5) Setup the objective function
diff --git a/examples/structural-inference/tension/statistical/demo_plot.py b/examples/structural-inference/tension/statistical/demo_plot.py
index 6214047..6805d18 100755
--- a/examples/structural-inference/tension/statistical/demo_plot.py
+++ b/examples/structural-inference/tension/statistical/demo_plot.py
@@ -40,6 +40,9 @@ def make(n, eta, s0, R, d, **kwargs):
 
 
 if __name__ == "__main__":
+    # Number of vectorized time steps
+    time_chunk_size = 40
+
     # 1) Load the data for the variance of interest,
     #    cut down to some number of samples, and flatten
     scale = 0.05
@@ -54,7 +57,7 @@ def make(n, eta, s0, R, d, **kwargs):
 
     names = ["n", "eta", "s0", "R", "d"]
     sampler = optimize.StatisticalModel(
-        make,
+        lambda *args, **kwargs: make(*args, block_size = time_chunk_size, **kwargs),
         names,
         [0.50, 0.49, 0.49, 0.48, 0.48],
         [0.02, 0.02, 0.03, 0.05, 0.05],
diff --git a/examples/structural-inference/tension/statistical/infer.py b/examples/structural-inference/tension/statistical/infer.py
index 092ea78..3424051 100755
--- a/examples/structural-inference/tension/statistical/infer.py
+++ b/examples/structural-inference/tension/statistical/infer.py
@@ -52,6 +52,9 @@ def make(n, eta, s0, R, d, **kwargs):
 
 
 if __name__ == "__main__":
+    # Number of vectorized time steps
+    time_chunk_size = 40
+
     # 1) Load the data for the variance of interest,
     #    cut down to some number of samples, and flatten
     scale = 0.05
@@ -84,7 +87,8 @@ def make(n, eta, s0, R, d, **kwargs):
 
     # 3) Create the actual model
     model = optimize.HierarchicalStatisticalModel(
-        make, names, loc_loc_priors, loc_scale_priors, scale_scale_priors, eps
+            lambda *args, **kwargs: make(*args, block_size = time_chunk_size, **kwargs), 
+            names, loc_loc_priors, loc_scale_priors, scale_scale_priors, eps
     ).to(device)
 
     # 4) Get the guide
diff --git a/pyoptmat/chunktime.py b/pyoptmat/chunktime.py
new file mode 100644
index 0000000..fa92ca6
--- /dev/null
+++ b/pyoptmat/chunktime.py
@@ -0,0 +1,475 @@
+# pylint: disable=abstract-method
+
+"""
+    Functions and objects to help with blocked/chunked time integration.
+
+    These include:
+        1. Sparse matrix classes for banded systems
+        2. General sparse matrix classes
+        3. Specialized solver routines working with banded systems
+"""
+
+import warnings
+
+import torch
+import torch.jit
+import numpy as np
+
+
+def newton_raphson_chunk(fn, x0, solver, rtol=1e-6, atol=1e-10, miter=100):
+    """
+    Solve a nonlinear system with Newton's method with a tensor for a
+    BackwardEuler type chunking operator context manager.
+
+    Args:
+      fn (function):        function that returns R, J, and the solver context
+      x0 (torch.tensor):    starting point
+      solver (ChunkTimeOperatorSolverContext): solver context
+
+    Keyword Args:
+      rtol (float):         nonlinear relative tolerance
+      atol (float):         nonlinear absolute tolerance
+      miter (int):          maximum number of nonlinear iterations
+
+    Returns:
+      torch.tensor:         solution to system of equations
+    """
+    x = x0
+    R, J = fn(x)
+
+    nR = torch.norm(R, dim=-1)
+    nR0 = nR
+    i = 0
+
+    while (i < miter) and torch.any(nR > atol) and torch.any(nR / nR0 > rtol):
+        x -= solver.solve(J, R)
+        R, J = fn(x)
+        nR = torch.norm(R, dim=-1)
+        i += 1
+
+    if i == miter:
+        warnings.warn("Implicit solve did not succeed.  Results may be inaccurate...")
+
+    return x
+
+
+class BidiagonalOperator(torch.nn.Module):
+    """
+    An object working with a Batched block diagonal operator of the type
+
+    .. math::
+
+        \\begin{bmatrix}
+        A_1 & 0 & 0 & 0 & \\cdots  & 0\\\\
+        B_1 & A_2 & 0 & 0 & \\cdots & 0\\\\
+        0 & B_2 & A_3 & 0 & \\cdots & 0\\\\
+        \\vdots & \\vdots & \\ddots & \\ddots & \\ddots  & \\vdots \\\\
+        0 & 0 & 0 & B_{n-2} & A_{n-1} & 0\\\\
+        0 & 0 & 0 & 0 & B_{n-1} & A_n
+        \\end{bmatrix}
+
+    that is, a blocked banded system with the main
+    diagonal and the first lower diagonal filled
+
+    We use the following sizes:
+        - nblk:   number of blocks in the square matrix
+        - sblk:   size of each block
+        - sbat:   batch size
+
+    Args:
+        A (torch.tensor): tensor of shape (nblk,sbat,sblk,sblk)
+            storing the nblk main diagonal blocks
+        B (torch.tensor): tensor of shape (nblk-1,sbat,sblk,sblk)
+            storing the nblk-1 off diagonal blocks
+    """
+
+    def __init__(self, A, B, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.A = A
+        self.B = B
+        self.nblk = A.shape[0]
+        self.sbat = A.shape[1]
+        self.sblk = A.shape[2]
+
+    @property
+    def dtype(self):
+        """
+        dtype, which is just the dtype of self.diag
+        """
+        return self.A.dtype
+
+    @property
+    def device(self):
+        """
+        device, which is just the device of self.diag
+        """
+        return self.A.device
+
+    @property
+    def n(self):
+        """
+        Size of the unbatched square matrix
+        """
+        return self.nblk * self.sblk
+
+    @property
+    def shape(self):
+        """
+        Logical shape of the dense array
+        """
+        return (self.sbat, self.n, self.n)
+
+
+class BidiagonalThomasFactorization(BidiagonalOperator):
+    """
+    Manages the data needed to solve our bidiagonal system via Thomas
+    factorization
+
+    Args:
+        A (torch.tensor): tensor of shape (nblk,sbat,sblk,sblk) with the main diagonal
+        B (torch.tensor): tensor of shape (nblk-1,sbat,sblk,sblk) with the off diagonal
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._setup_factorization()
+
+    def forward(self, v):
+        """
+        Complete the backsolve for a given right hand side
+
+        Args:
+            v (torch.tensor): tensor of shape (sbat, sblk*nblk)
+        """
+        y = torch.empty_like(v)
+        i = 0
+        s = self.sblk
+        y[:, i * s : (i + 1) * s] = torch.linalg.lu_solve(
+            self.lu[i], self.pivots[i], v[:, i * s : (i + 1) * s].unsqueeze(-1)
+        ).squeeze(-1)
+        # The .clone() here really makes no sense to me, but torch assures me it is necessary
+        for i in range(1, self.nblk):
+            y[:, i * s : (i + 1) * s] = torch.linalg.lu_solve(
+                self.lu[i],
+                self.pivots[i],
+                v[:, i * s : (i + 1) * s].unsqueeze(-1)
+                - torch.bmm(
+                    self.B[i - 1], y[:, (i - 1) * s : i * s].clone().unsqueeze(-1)
+                ),
+            ).squeeze(-1)
+
+        return y
+
+    def _setup_factorization(self):
+        """
+        Form the factorization...
+
+        Args:
+            diag (torch.tensor): diagonal blocks of shape (nblk, sbat, sblk, sblk)
+        """
+        self.lu, self.pivots, _ = torch.linalg.lu_factor_ex(self.A)
+
+
+class BidiagonalForwardOperator(BidiagonalOperator):
+    """
+    A batched block banded matrix of the form:
+
+    .. math::
+
+        \\begin{bmatrix}
+        A_1 & 0 & 0 & 0 & \\cdots  & 0\\\\
+        B_1 & A_2 & 0 & 0 & \\cdots & 0\\\\
+        0 & B_2 & A_3 & 0 & \\cdots & 0\\\\
+        \\vdots & \\vdots & \\ddots & \\ddots & \\ddots  & \\vdots \\\\
+        0 & 0 & 0 & B_{n-2} & A_{n-1} & 0\\\\
+        0 & 0 & 0 & 0 & B_{n-1} & A_n
+        \\end{bmatrix}
+
+    that is, a blocked banded system with the main
+    diagonal and first lower block diagonal filled
+
+    We use the following sizes:
+        - nblk: number of blocks in the square matrix
+        - sblk: size of each block
+        - sbat: batch size
+
+    Args:
+        A (torch.tensor): tensor of shape (nblk,sbat,sblk,sblk)
+            storing the nblk diagonal blocks
+        B (torch.tensor): tensor of shape (nblk-1,sbat,sblk,sblk)
+            storing the nblk-1 off diagonal blocks
+    """
+
+    def to_diag(self):
+        """
+        Convert to a SquareBatchedBlockDiagonalMatrix, for testing
+        or legacy purposes
+        """
+        return SquareBatchedBlockDiagonalMatrix([self.A, self.B], [0, -1])
+
+    def forward(self, v):
+        """
+        :math:`A \\cdot v` in an efficient manner
+
+        Args:
+            v (torch.tensor):   batch of vectors
+        """
+        # Reshaped v
+        vp = (
+            v.view(self.sbat, self.nblk, self.sblk)
+            .transpose(0, 1)
+            .reshape(self.sbat * self.nblk, self.sblk)
+            .unsqueeze(-1)
+        )
+
+        # Actual calculation
+        b = torch.bmm(self.A.view(-1, self.sblk, self.sblk), vp)
+        b[self.sbat :] += torch.bmm(
+            self.B.view(-1, self.sblk, self.sblk), vp[: -self.sbat]
+        )
+
+        return (
+            b.squeeze(-1)
+            .view(self.nblk, self.sbat, self.sblk)
+            .transpose(1, 0)
+            .flatten(start_dim=1)
+        )
+
+    def inverse(self):
+        """
+        Return an inverse operator
+        """
+        return BidiagonalThomasFactorization(self.A, self.B)
+
+
+class ChunkTimeOperatorSolverContext:
+    """
+    Context manager for solving sparse chunked time systems
+
+    Args:
+        solve_method:   one of "dense" or "direct"
+    """
+
+    def __init__(self, solve_method):
+
+        if solve_method not in ["dense", "direct"]:
+            raise ValueError("Solve method must be one of dense or direct")
+        self.solve_method = solve_method
+
+    def solve(self, J, R):
+        """
+        Actually solve Jx = R
+
+        Args:
+            J (BidiagonalForwardOperator):  matrix operator
+            R (torch.tensor):       right hand side
+        """
+        if self.solve_method == "dense":
+            return self.solve_dense(J, R)
+        if self.solve_method == "direct":
+            return self.solve_direct(J, R)
+        raise RuntimeError("Unknown solver method...")
+
+    def solve_dense(self, J, R):
+        """
+        Highly inefficient solve where we first convert to a dense tensor
+
+        Args:
+            J (BidiagonalForwardOperator):  matrix operator
+            R (torch.tensor):       right hand side
+        """
+        return torch.linalg.solve_ex(J.to_diag().to_dense(), R)[0]
+
+    def solve_direct(self, J, R):
+        """
+        Solve with a direct factorization
+
+        Args:
+            J (BidiagonalForwardOperator):  matrix operator
+            R (torch.tensor):       right hand side
+        """
+        M = J.inverse()
+        return M(R)
+
+
+class SquareBatchedBlockDiagonalMatrix:
+    """
+    A batched block diagonal matrix of the type
+
+    .. math::
+
+        \\begin{bmatrix}
+        A_1 & B_1 & 0 & 0\\\\
+        C_1 & A_2 & B_2 & 0 \\\\
+        0 & C_2 & A_3 & B_3\\\\
+        0 & 0 & C_3 & A_4
+        \\end{bmatrix}
+
+    where the matrix has diagonal blocks of non-zeros and
+    can have arbitrary numbers of filled diagonals
+
+    Additionally, this matrix is batched.
+
+    We use the following sizes:
+        - nblk: number of blocks in the each direction
+        - sblk: size of each block
+        - sbat: batch size
+
+    Args:
+        data (list of tensors):     list of tensors of length ndiag.
+                                    Each tensor
+                                    has shape :code:`(nblk-abs(d),sbat,sblk,sblk)`
+                                    where d is the diagonal number
+                                    provided in the next input
+        diags (list of ints):       list of ints of length ndiag.
+                                    Each entry gives the diagonal
+                                    for the data in the corresponding
+                                    tensor.  These values d can
+                                    range from -(n-1) to (n-1)
+    """
+
+    def __init__(self, data, diags):
+        # We will want this in order later
+        iargs = np.argsort(diags)
+
+        self.data = [data[i] for i in iargs]
+        self.diags = [diags[i] for i in iargs]
+
+        self.nblk = self.data[0].shape[0] + abs(self.diags[0])
+        self.sbat = self.data[0].shape[1]
+        self.sblk = self.data[0].shape[-1]
+
+    @property
+    def dtype(self):
+        """
+        dtype, as reported by the first entry in self.data
+        """
+        return self.data[0].dtype
+
+    @property
+    def device(self):
+        """
+        device, as reported by the first entry in self.device
+        """
+        return self.data[0].device
+
+    @property
+    def n(self):
+        """
+        Size of the unbatched square matrix
+        """
+        return self.nblk * self.sblk
+
+    @property
+    def shape(self):
+        """
+        Logical shape of the dense array
+        """
+        return (self.sbat, self.n, self.n)
+
+    @property
+    def nnz(self):
+        """
+        Number of logical non-zeros (not counting the batch dimension)
+        """
+        return sum(
+            self.data[i].shape[0] * self.sblk * self.sblk
+            for i in range(len(self.diags))
+        )
+
+    def to_dense(self):
+        """
+        Convert the representation to a dense tensor
+        """
+        A = torch.zeros(*self.shape, dtype=self.dtype, device=self.device)
+
+        # There may be a more clever way than for loops, but for now
+        for d, data in zip(self.diags, self.data):
+            for k in range(self.nblk - abs(d)):
+                if d <= 0:
+                    i = k - d
+                    j = k
+                else:
+                    i = k
+                    j = k + d
+                A[
+                    :,
+                    i * self.sblk : (i + 1) * self.sblk,
+                    j * self.sblk : (j + 1) * self.sblk,
+                ] = data[k]
+
+        return A
+
+    def to_batched_coo(self):
+        """
+        Convert to a torch sparse batched COO tensor
+
+        This is done in a weird way.  torch recognizes "batch" dimensions at
+        the start of the tensor and "dense" dimensions at the end (with "sparse"
+        dimensions in between).  batch dimensions can/do have difference indices,
+        dense dimensions all share the same indices.  We have the latter situation
+        so this is setup as a tensor with no "batch" dimensions, 2 "sparse" dimensions,
+        and 1 "dense" dimension.  So it will be the transpose of the shape of the
+        to_dense function.
+        """
+        inds = torch.zeros(2, self.nnz)
+        data = torch.zeros(self.nnz, self.sbat, dtype=self.dtype, device=self.device)
+
+        # Order doesn't matter, nice!
+        c = 0
+        chunk = self.sblk * self.sblk
+        for d, bdata in zip(self.diags, self.data):
+            for i in range(bdata.shape[0]):
+                data[c : c + chunk] = bdata[i].flatten(start_dim=1).t()
+
+                offset = (i + abs(d)) * self.sblk
+
+                if d < 0:
+                    roffset = offset
+                    coffset = i * self.sblk
+                else:
+                    roffset = i * self.sblk
+                    coffset = offset
+
+                inds[0, c : c + chunk] = (
+                    torch.repeat_interleave(
+                        torch.arange(
+                            0, self.sblk, dtype=torch.int64, device=self.device
+                        ).unsqueeze(-1),
+                        self.sblk,
+                        -1,
+                    ).flatten()
+                    + roffset
+                )
+                inds[1, c : c + chunk] = (
+                    torch.repeat_interleave(
+                        torch.arange(
+                            0, self.sblk, dtype=torch.int64, device=self.device
+                        ).unsqueeze(0),
+                        self.sblk,
+                        0,
+                    ).flatten()
+                    + coffset
+                )
+
+                c += chunk
+
+        return torch.sparse_coo_tensor(
+            inds,
+            data,
+            dtype=self.dtype,
+            device=self.device,
+            size=(self.n, self.n, self.sbat),
+        ).coalesce()
+
+    def to_unrolled_csr(self):
+        """
+        Return a list of CSR tensors with length equal to the batch size
+
+        """
+        coo = self.to_batched_coo()
+        return [
+            torch.sparse_coo_tensor(coo.indices(), coo.values()[:, i]).to_sparse_csr()
+            for i in range(self.sbat)
+        ]
diff --git a/pyoptmat/flowrules.py b/pyoptmat/flowrules.py
index c268690..b3839dd 100644
--- a/pyoptmat/flowrules.py
+++ b/pyoptmat/flowrules.py
@@ -58,7 +58,7 @@ def dflow_dhist(self, s, h, t, T, e):
           torch.tensor:       derivative of flow rate with respect to the
                               internal variables
         """
-        return torch.zeros(s.shape + (1,) + h.shape[1:], device=h.device)
+        return torch.zeros(s.shape + (1,) + h.shape[-1:], device=h.device)
 
     def dflow_derate(self, s, h, t, T, e):
         """
@@ -78,7 +78,7 @@ def dflow_derate(self, s, h, t, T, e):
           torch.tensor:       derivative of flow rate with respect to the
                               internal variables
         """
-        return torch.zeros(s.shape + e.shape[1:], device=h.device)
+        return torch.zeros(s.shape, device=h.device)
 
     def dhist_dstress(self, s, h, t, T, e):
         """
@@ -97,7 +97,7 @@ def dhist_dstress(self, s, h, t, T, e):
         Returns:
           torch.tensor:       derivative of flow rate with respect to the stress
         """
-        return torch.zeros(h.shape + s.shape[1:], device=h.device)
+        return torch.zeros(h.shape, device=h.device)
 
     def dhist_derate(self, s, h, t, T, e):
         """
@@ -116,7 +116,7 @@ def dhist_derate(self, s, h, t, T, e):
         Returns:
           torch.tensor:       derivative of flow rate with respect to the strain rate
         """
-        return torch.zeros(h.shape + e.shape[1:], device=h.device)
+        return torch.zeros(h.shape, device=h.device)
 
 
 class KocksMeckingRegimeFlowRule(FlowRule):
@@ -539,7 +539,10 @@ def blend_values(self, vals1, vals2, T, e):
             T (torch.tensor):       temperatures
             e (torch.tensor):       strain rates
         """
-        f = self.f(T, e)[(...,) + (None,) * (vals1.dim() - 1)]
+        f = self.f(T, e)
+        # Ugh, really?
+        diff = vals1.dim() - f.dim()
+        f = f[(...,) + (None,) * diff]
 
         return (1 - f) * vals1 + f * vals2
 
@@ -800,7 +803,7 @@ def dflow_dhist(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        return self.base.dflow_dhist(s, h, t, T, e) * self.scale(e)[:, None, None]
+        return self.base.dflow_dhist(s, h, t, T, e) * self.scale(e)[..., None, None]
 
     def dflow_derate(self, s, h, t, T, e):
         """
@@ -841,7 +844,7 @@ def history_rate(self, s, h, t, T, e):
         rate, drate = self.base.history_rate(s, h, t, T, e)
         sf = self.scale(e)
 
-        return sf[:, None] * rate, sf[:, None, None] * drate
+        return sf[..., None] * rate, sf[..., None, None] * drate
 
     def dhist_dstress(self, s, h, t, T, e):
         """
@@ -857,7 +860,7 @@ def dhist_dstress(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        return self.scale(e)[:, None] * self.base.dhist_dstress(s, h, t, T, e)
+        return self.scale(e)[..., None] * self.base.dhist_dstress(s, h, t, T, e)
 
     def dhist_derate(self, s, h, t, T, e):
         """
@@ -874,8 +877,8 @@ def dhist_derate(self, s, h, t, T, e):
           torch.tensor:       the derivative of the flow rate
         """
         return (
-            self.base.dhist_derate(s, h, t, T, e) * self.scale(e)[:, None]
-            + self.base.history_rate(s, h, t, T, e)[0] * self.dscale(e)[:, None]
+            self.base.dhist_derate(s, h, t, T, e) * self.scale(e)[..., None]
+            + self.base.history_rate(s, h, t, T, e)[0] * self.dscale(e)[..., None]
         )
 
 
@@ -930,7 +933,7 @@ def flow_rate(self, s, h, t, T, e):
         drate = torch.zeros_like(s)
 
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            ratei, dratei = model.flow_rate(s, h[:, o : o + n], t, T, e)
+            ratei, dratei = model.flow_rate(s, h[..., o : o + n], t, T, e)
             rate += ratei
             drate += dratei
 
@@ -950,11 +953,11 @@ def dflow_dhist(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        res = torch.zeros(s.shape[:1] + (1,) + h.shape[1:], device=h.device)
+        res = torch.zeros(s.shape + (1,) + h.shape[-1:], device=h.device)
 
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dratei = model.dflow_dhist(s, h[:, o : o + n], t, T, e)
-            res[:, :, o : o + n] = dratei
+            dratei = model.dflow_dhist(s, h[..., o : o + n], t, T, e)
+            res[..., :, o : o + n] = dratei
 
         return res
 
@@ -976,7 +979,7 @@ def dflow_derate(self, s, h, t, T, e):
         res = torch.zeros(e.shape, device=h.device)
 
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dratei = model.dflow_derate(s, h[:, o : o + n], t, T, e)
+            dratei = model.dflow_derate(s, h[..., o : o + n], t, T, e)
             res += dratei
 
         return res
@@ -1002,9 +1005,9 @@ def history_rate(self, s, h, t, T, e):
         drate = torch.zeros(h.shape + h.shape[-1:], device=h.device)
 
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            ratei, dratei = model.history_rate(s, h[:, o : o + n], t, T, e)
-            rate[:, o : o + n] = ratei
-            drate[:, o : o + n, o : o + n] = dratei
+            ratei, dratei = model.history_rate(s, h[..., o : o + n], t, T, e)
+            rate[..., o : o + n] = ratei
+            drate[..., o : o + n, o : o + n] = dratei
 
         return (rate, drate)
 
@@ -1022,11 +1025,11 @@ def dhist_dstress(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        res = torch.zeros(h.shape + s.shape[1:], device=h.device)
+        res = torch.zeros(h.shape, device=h.device)
 
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dratei = model.dhist_dstress(s, h[:, o : o + n], t, T, e)
-            res[:, o : o + n] = dratei
+            dratei = model.dhist_dstress(s, h[..., o : o + n], t, T, e)
+            res[..., o : o + n] = dratei
 
         return res
 
@@ -1044,11 +1047,11 @@ def dhist_derate(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        res = torch.zeros(h.shape + e.shape[1:], device=h.device)
+        res = torch.zeros(h.shape, device=h.device)
 
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dratei = model.dhist_derate(s, h[:, o : o + n], t, T, e)
-            res[:, o : o + n] = dratei
+            dratei = model.dhist_derate(s, h[..., o : o + n], t, T, e)
+            res[..., o : o + n] = dratei
 
         return res
 
@@ -1176,8 +1179,8 @@ def flow_rate(self, s, h, t, T, e):
           tuple(torch.tensor, torch.tensor):  the flow rate and the derivative of the flow rate with
                                               respect to stress
         """
-        ih = self.isotropic.value(h[:, : self.isotropic.nhist])
-        kh = self.kinematic.value(h[:, self.isotropic.nhist :])
+        ih = self.isotropic.value(h[..., : self.isotropic.nhist])
+        kh = self.kinematic.value(h[..., self.isotropic.nhist :])
 
         return (
             utility.macaulay((torch.abs(s - kh) - self.s0(T) - ih) / self.eta(T))
@@ -1203,9 +1206,9 @@ def dflow_diso(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        ih = self.isotropic.value(h[:, : self.isotropic.nhist])
+        ih = self.isotropic.value(h[..., : self.isotropic.nhist])
         kh = self.kinematic.value(
-            h[:, self.isotropic.nhist : self.isotropic.nhist + self.kinematic.nhist]
+            h[..., self.isotropic.nhist : self.isotropic.nhist + self.kinematic.nhist]
         )
 
         iv = (torch.abs(s - kh) - self.s0(T) - ih) / self.eta(T)
@@ -1231,9 +1234,9 @@ def dflow_dkin(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        ih = self.isotropic.value(h[:, : self.isotropic.nhist])
+        ih = self.isotropic.value(h[..., : self.isotropic.nhist])
         kh = self.kinematic.value(
-            h[:, self.isotropic.nhist : self.isotropic.nhist + self.kinematic.nhist]
+            h[..., self.isotropic.nhist : self.isotropic.nhist + self.kinematic.nhist]
         )
 
         return (
@@ -1275,51 +1278,49 @@ def history_rate(self, s, h, t, T, e):
         hdiv = torch.zeros(h.shape + h.shape[-1:], device=h.device)
         erate, _ = self.flow_rate(s, h, t, T, e)
 
-        hiso = h[:, : self.isotropic.nhist]
-        hkin = h[:, self.isotropic.nhist :]
+        hiso = h[..., : self.isotropic.nhist]
+        hkin = h[..., self.isotropic.nhist :]
 
-        hrate[:, : self.isotropic.nhist] = self.isotropic.history_rate(
+        hrate[..., : self.isotropic.nhist] = self.isotropic.history_rate(
             s, hiso, t, erate, T, e
         )
-        hrate[:, self.isotropic.nhist :] = self.kinematic.history_rate(
+        hrate[..., self.isotropic.nhist :] = self.kinematic.history_rate(
             s, hkin, t, erate, T, e
         )
 
         # History partials
         hdiv[
-            :, : self.isotropic.nhist, : self.isotropic.nhist
+            ..., : self.isotropic.nhist, : self.isotropic.nhist
         ] = self.isotropic.dhistory_rate_dhistory(s, hiso, t, erate, T, e)
         hdiv[
-            :, self.isotropic.nhist :, self.isotropic.nhist :
+            ..., self.isotropic.nhist :, self.isotropic.nhist :
         ] = self.kinematic.dhistory_rate_dhistory(s, hkin, t, erate, T, e)
 
         # Strain rate components
-        hdiv[:, : self.isotropic.nhist, : self.isotropic.nhist] += torch.matmul(
+        hdiv[..., : self.isotropic.nhist, : self.isotropic.nhist] += utility.mbmm(
             self.isotropic.dhistory_rate_derate(s, hiso, t, erate, T, e),
-            torch.matmul(
-                self.dflow_diso(s, h, t, T, e)[:, None, None],
-                self.isotropic.dvalue(hiso)[:, None, :],
+            utility.mbmm(
+                self.dflow_diso(s, h, t, T, e)[..., None, None],
+                self.isotropic.dvalue(hiso)[..., None, :],
             ),
         )
-        hdiv[:, : self.isotropic.nhist, self.isotropic.nhist :] += torch.matmul(
-            self.isotropic.dhistory_rate_derate(s, hiso, t, erate, T, e),
-            torch.matmul(
-                self.dflow_dkin(s, h, t, T, e)[:, None, None],
-                self.kinematic.dvalue(hkin)[:, None, :],
-            ),
+        hdiv[..., : self.isotropic.nhist, self.isotropic.nhist :] += (
+            self.isotropic.dhistory_rate_derate(s, hiso, t, erate, T, e)
+            * self.dflow_dkin(s, h, t, T, e)[..., None, None]
+            * self.kinematic.dvalue(hkin)[..., None, :]
         )
-        hdiv[:, self.isotropic.nhist :, : self.isotropic.nhist] += torch.matmul(
+        hdiv[..., self.isotropic.nhist :, : self.isotropic.nhist] += utility.mbmm(
             self.kinematic.dhistory_rate_derate(s, hkin, t, erate, T, e),
-            torch.matmul(
-                self.dflow_diso(s, h, t, T, e)[:, None, None],
-                self.isotropic.dvalue(hiso)[:, None, :],
+            utility.mbmm(
+                self.dflow_diso(s, h, t, T, e)[..., None, None],
+                self.isotropic.dvalue(hiso)[..., None, :],
             ),
         )
-        hdiv[:, self.isotropic.nhist :, self.isotropic.nhist :] += torch.matmul(
+        hdiv[..., self.isotropic.nhist :, self.isotropic.nhist :] += utility.mbmm(
             self.kinematic.dhistory_rate_derate(s, hkin, t, erate, T, e),
-            torch.matmul(
-                self.dflow_dkin(s, h, t, T, e)[:, None, None],
-                self.kinematic.dvalue(hkin)[:, None, :],
+            utility.mbmm(
+                self.dflow_dkin(s, h, t, T, e)[..., None, None],
+                self.kinematic.dvalue(hkin)[..., None, :],
             ),
         )
 
@@ -1339,16 +1340,16 @@ def dflow_dhist(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        res = torch.zeros(s.shape[:1] + (1,) + h.shape[1:], device=h.device)
+        res = torch.zeros(s.shape + (1,) + h.shape[-1:], device=h.device)
 
-        hiso = h[:, : self.isotropic.nhist]
-        hkin = h[:, self.isotropic.nhist :]
+        hiso = h[..., : self.isotropic.nhist]
+        hkin = h[..., self.isotropic.nhist :]
 
-        res[:, 0, : self.isotropic.nhist] = self.dflow_diso(s, h, t, T, e)[
-            :, None
+        res[..., 0, : self.isotropic.nhist] = self.dflow_diso(s, h, t, T, e)[
+            ..., None
         ] * self.isotropic.dvalue(hiso)
-        res[:, 0, self.isotropic.nhist :] = self.dflow_dkin(s, h, t, T, e)[
-            :, None
+        res[..., 0, self.isotropic.nhist :] = self.dflow_dkin(s, h, t, T, e)[
+            ..., None
         ] * self.kinematic.dvalue(hkin)
 
         return res
@@ -1367,23 +1368,25 @@ def dhist_dstress(self, s, h, t, T, e):
         Returns:
           torch.tensor:       the derivative of the flow rate
         """
-        res = torch.zeros(h.shape + s.shape[1:], device=h.device)
+        res = torch.zeros(h.shape, device=h.device)
 
         erate, derate = self.flow_rate(s, h, t, T, e)
 
-        hiso = h[:, : self.isotropic.nhist]
-        hkin = h[:, self.isotropic.nhist :]
+        hiso = h[..., : self.isotropic.nhist]
+        hkin = h[..., self.isotropic.nhist :]
 
-        res[:, : self.isotropic.nhist] = (
+        res[..., : self.isotropic.nhist] = (
             self.isotropic.dhistory_rate_dstress(s, hiso, t, erate, T, e)
-            + self.isotropic.dhistory_rate_derate(s, hiso, t, erate, T, e).bmm(
-                derate[..., None, None]
+            + utility.mbmm(
+                self.isotropic.dhistory_rate_derate(s, hiso, t, erate, T, e),
+                derate[..., None, None],
             )[..., 0]
         )
-        res[:, self.isotropic.nhist :] = (
+        res[..., self.isotropic.nhist :] = (
             self.kinematic.dhistory_rate_dstress(s, hkin, t, erate, T, e)
-            + self.kinematic.dhistory_rate_derate(s, hkin, t, erate, T, e).bmm(
-                derate[..., None, None]
+            + utility.mbmm(
+                self.kinematic.dhistory_rate_derate(s, hkin, t, erate, T, e),
+                derate[..., None, None],
             )[..., 0]
         )
 
@@ -1403,17 +1406,17 @@ def dhist_derate(self, s, h, t, T, e):
         Returns:
           torch.tensor:       derivative of flow rate with respect to the strain rate
         """
-        res = torch.zeros(h.shape + e.shape[1:], device=h.device)
+        res = torch.zeros(h.shape, device=h.device)
 
         erate, _ = self.flow_rate(s, h, t, T, e)
 
-        hiso = h[:, : self.isotropic.nhist]
-        hkin = h[:, self.isotropic.nhist :]
+        hiso = h[..., : self.isotropic.nhist]
+        hkin = h[..., self.isotropic.nhist :]
 
-        res[:, : self.isotropic.nhist] = self.isotropic.dhistory_rate_dtotalrate(
+        res[..., : self.isotropic.nhist] = self.isotropic.dhistory_rate_dtotalrate(
             s, hiso, t, erate, T, e
         )
-        res[:, self.isotropic.nhist :] = self.kinematic.dhistory_rate_dtotalrate(
+        res[..., self.isotropic.nhist :] = self.kinematic.dhistory_rate_dtotalrate(
             s, hkin, t, erate, T, e
         )
 
diff --git a/pyoptmat/hardening.py b/pyoptmat/hardening.py
index 59dcafe..c0f713e 100644
--- a/pyoptmat/hardening.py
+++ b/pyoptmat/hardening.py
@@ -92,7 +92,7 @@ def value(self, h):
         Returns:
           torch.tensor:       the isotropic hardening value
         """
-        return h[:, 0]
+        return h[..., 0]
 
     def dvalue(self, h):
         """
@@ -105,7 +105,7 @@ def dvalue(self, h):
           torch.tensor:       the derivative of the isotropic hardening value
                               with respect to the internal variables
         """
-        return torch.ones((h.shape[0], 1), device=h.device)
+        return torch.ones(h.shape[:-1] + (1,), device=h.device)
 
     @property
     def nhist(self):
@@ -129,7 +129,7 @@ def history_rate(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       internal variable rate
         """
-        return torch.unsqueeze(self.d(T) * (self.R(T) - h[:, 0]) * torch.abs(ep), 1)
+        return torch.unsqueeze(self.d(T) * (self.R(T) - h[..., 0]) * torch.abs(ep), -1)
 
     def dhistory_rate_dstress(self, s, h, t, ep, T, e):
         """
@@ -163,12 +163,9 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to history
         """
-        return torch.unsqueeze(
-            -torch.unsqueeze(self.d(T), -1)
-            * torch.ones_like(h)
-            * torch.abs(ep)[:, None],
-            1,
-        )
+        return (-self.d(T) * torch.ones_like(h[..., 0]) * torch.abs(ep))[
+            ..., None, None
+        ]
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
@@ -186,9 +183,7 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to the inelastic rate
         """
-        return torch.unsqueeze(
-            torch.unsqueeze(self.d(T) * (self.R(T) - h[:, 0]) * torch.sign(ep), 1), 1
-        )
+        return (self.d(T) * (self.R(T) - h[..., 0]) * torch.sign(ep))[..., None, None]
 
 
 class Theta0VoceIsotropicHardeningModel(IsotropicHardeningModel):
@@ -225,7 +220,7 @@ def value(self, h):
         Returns:
           torch.tensor:       the isotropic hardening value
         """
-        return h[:, 0]
+        return h[..., 0]
 
     def dvalue(self, h):
         """
@@ -238,7 +233,7 @@ def dvalue(self, h):
         Returns:
           torch.tensor:       the isotropic hardening value
         """
-        return torch.ones((h.shape[0], 1), device=h.device)
+        return torch.ones(h.shape[:-1] + (1,), device=h.device)
 
     @property
     def nhist(self):
@@ -263,7 +258,7 @@ def history_rate(self, s, h, t, ep, T, e):
           torch.tensor:       internal variable rate
         """
         return torch.unsqueeze(
-            self.theta(T) * (1.0 - h[:, 0] / self.tau(T)) * torch.abs(ep), 1
+            self.theta(T) * (1.0 - h[..., 0] / self.tau(T)) * torch.abs(ep), -1
         )
 
     def dhistory_rate_dstress(self, s, h, t, ep, T, e):
@@ -298,12 +293,9 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to history
         """
-        return torch.unsqueeze(
-            -torch.unsqueeze(self.theta(T) / self.tau(T), -1)
-            * torch.ones_like(h)
-            * torch.abs(ep)[:, None],
-            1,
-        )
+        return (
+            -self.theta(T) / self.tau(T) * torch.ones_like(h[..., 0]) * torch.abs(ep)
+        )[..., None, None]
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
@@ -321,12 +313,9 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to the inelastic rate
         """
-        return torch.unsqueeze(
-            torch.unsqueeze(
-                self.theta(T) * (1.0 - h[:, 0] / self.tau(T)) * torch.sign(ep), 1
-            ),
-            1,
-        )
+        return (self.theta(T) * (1.0 - h[..., 0] / self.tau(T)) * torch.sign(ep))[
+            ..., None, None
+        ]
 
 
 class Theta0RecoveryVoceIsotropicHardeningModel(IsotropicHardeningModel):
@@ -364,7 +353,7 @@ def value(self, h):
         Args:
           h:      the vector of internal variables for this model
         """
-        return h[:, 0]
+        return h[..., 0]
 
     def dvalue(self, h):
         """
@@ -373,7 +362,7 @@ def dvalue(self, h):
         Args:
           h:      the vector of internal variables for this model
         """
-        return torch.ones((h.shape[0], 1), device=h.device)
+        return torch.ones(h.shape[:-1] + (1,), device=h.device)
 
     @property
     def nhist(self):
@@ -395,11 +384,11 @@ def history_rate(self, s, h, t, ep, T, e):
           e (torch.tensor):   total strain rate
         """
         return torch.unsqueeze(
-            self.theta(T) * (1.0 - h[:, 0] / self.tau(T)) * torch.abs(ep)
+            self.theta(T) * (1.0 - h[..., 0] / self.tau(T)) * torch.abs(ep)
             + self.r1(T)
-            * (self.R0(T) - h[:, 0])
-            * torch.abs(self.R0(T) - h[:, 0]) ** (self.r2(T) - 1.0),
-            1,
+            * (self.R0(T) - h[..., 0])
+            * torch.abs(self.R0(T) - h[..., 0]) ** (self.r2(T) - 1.0),
+            -1,
         )
 
     def dhistory_rate_dstress(self, s, h, t, ep, T, e):
@@ -431,17 +420,11 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
         recovery = (
             self.r2(T)
             * self.r1(T)
-            * torch.abs(self.R0(T) - h[:, 0]) ** (self.r2(T) - 1.0)
-        )[:, None, None]
+            * torch.abs(self.R0(T) - h[..., 0]) ** (self.r2(T) - 1.0)
+        )[..., None, None]
         return (
-            torch.unsqueeze(
-                -torch.unsqueeze(self.theta(T) / self.tau(T), -1)
-                * torch.ones_like(h)
-                * torch.abs(ep)[:, None],
-                1,
-            )
-            - recovery
-        )
+            -self.theta(T) / self.tau(T) * torch.ones_like(h[..., 0]) * torch.abs(ep)
+        )[..., None, None] - recovery
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
@@ -456,12 +439,9 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
           T:      temperature
           e (torch.tensor):   total strain rate
         """
-        return torch.unsqueeze(
-            torch.unsqueeze(
-                self.theta(T) * (1.0 - h[:, 0] / self.tau(T)) * torch.sign(ep), 1
-            ),
-            1,
-        )
+        return (self.theta(T) * (1.0 - h[..., 0] / self.tau(T)) * torch.sign(ep))[
+            ..., None, None
+        ]
 
 
 class KinematicHardeningModel(HardeningModel):
@@ -498,7 +478,7 @@ def value(self, h):
         Args:
           h:      vector of internal variables
         """
-        return torch.zeros(h.shape[0], device=h.device)
+        return torch.zeros(h.shape[:-1], device=h.device)
 
     def dvalue(self, h):
         """
@@ -508,7 +488,7 @@ def dvalue(self, h):
         Args:
           h:      vector of internal variables
         """
-        return torch.zeros(h.shape[0], 0, device=h.device)
+        return torch.zeros(h.shape[:-1] + (0,), device=h.device)
 
     def history_rate(self, s, h, t, ep, T, e):
         """
@@ -554,7 +534,7 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
           T:      temperature
           e (torch.tensor):   total strain rate
         """
-        return torch.empty(h.shape[0], 0, 0, device=h.device)
+        return torch.empty(h.shape[:-1] + (0, 0), device=h.device)
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
@@ -571,7 +551,7 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
           T:      temperature
           e (torch.tensor):   total strain rate
         """
-        return torch.empty(h.shape[0], 0, 1, device=h.device)
+        return torch.empty(h.shape[:-1] + (0, 1), device=h.device)
 
 
 class FAKinematicHardeningModel(KinematicHardeningModel):
@@ -622,7 +602,7 @@ def value(self, h):
         Returns:
           torch.tensor:       the kinematic hardening value
         """
-        return h[:, 0]
+        return h[..., 0]
 
     def dvalue(self, h):
         """
@@ -635,7 +615,7 @@ def dvalue(self, h):
           torch.tensor:       the derivative of the kinematic hardening value
                               with respect to the internal variables
         """
-        return torch.ones((h.shape[0], 1), device=h.device)
+        return torch.ones(h.shape[:-1] + (1,), device=h.device)
 
     @property
     def nhist(self):
@@ -660,10 +640,10 @@ def history_rate(self, s, h, t, ep, T, e):
           torch.tensor:       internal variable rate
         """
         return torch.unsqueeze(
-            self.C(T) * ep
-            - self.g(T) * h[:, 0] * torch.abs(ep)
-            - self.b(T) * torch.abs(h[:, 0]) ** (self.r(T) - 1.0) * h[:, 0],
-            1,
+            self.C(T) * torch.ones_like(h[..., 0]) * ep
+            - self.g(T) * h[..., 0] * torch.abs(ep)
+            - self.b(T) * torch.abs(h[..., 0]) ** (self.r(T) - 1.0) * h[..., 0],
+            -1,
         )
 
     def dhistory_rate_dstress(self, s, h, t, ep, T, e):
@@ -699,11 +679,9 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
           torch.tensor:       derivative with respect to history
         """
         return (
-            torch.unsqueeze(-self.g(T)[..., None] * torch.abs(ep)[:, None], 1)
-            - (self.b(T) * self.r(T) * torch.abs(h)[:, 0] ** (self.r(T) - 1.0))[
-                :, None, None
-            ]
-        )
+            -self.g(T) * torch.ones_like(h[..., 0]) * torch.abs(ep)
+            - (self.b(T) * self.r(T) * torch.abs(h[..., 0]) ** (self.r(T) - 1.0))
+        )[..., None, None]
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
@@ -721,10 +699,7 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to the inelastic rate
         """
-        return torch.unsqueeze(
-            torch.unsqueeze(self.C(T) - self.g(T) * h[:, 0] * torch.sign(ep), 1),
-            1,
-        )
+        return (self.C(T) - self.g(T) * h[..., 0] * torch.sign(ep))[..., None, None]
 
 
 class ChabocheHardeningModel(KinematicHardeningModel):
@@ -770,7 +745,7 @@ def value(self, h):
         Returns:
           torch.tensor:       the kinematic hardening value
         """
-        return torch.sum(h, 1)
+        return torch.sum(h, -1)
 
     def dvalue(self, h):
         """
@@ -783,7 +758,7 @@ def dvalue(self, h):
           torch.tensor:       the derivative of the kinematic hardening value
                               with respect to the internal variables
         """
-        return torch.ones((h.shape[0], self.nback), device=h.device)
+        return torch.ones(h.shape[:-1] + (self.nback,), device=h.device)
 
     @property
     def nhist(self):
@@ -808,8 +783,8 @@ def history_rate(self, s, h, t, ep, T, e):
           torch.tensor:       internal variable rate
         """
         return (
-            self.C(T)[None, ...] * ep[:, None]
-            - self.g(T)[None, ...] * h * torch.abs(ep)[:, None]
+            self.C(T)[None, ...] * ep[..., None]
+            - self.g(T)[None, ...] * h * torch.abs(ep)[..., None]
         ).reshape(h.shape)
 
     def dhistory_rate_dstress(self, s, h, t, ep, T, e):
@@ -844,9 +819,9 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to history
         """
-        return torch.diag_embed(-self.g(T)[None, ...] * torch.abs(ep)[:, None]).reshape(
-            h.shape + h.shape[1:]
-        )
+        return torch.diag_embed(
+            -self.g(T)[None, ...] * torch.abs(ep)[..., None]
+        ).reshape(h.shape + (self.nback,))
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
@@ -865,8 +840,8 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
           torch.tensor:       derivative with respect to the inelastic rate
         """
         return torch.unsqueeze(
-            self.C(T)[None, ...] * torch.ones_like(ep)[:, None]
-            - self.g(T)[None, :] * h * torch.sign(ep)[:, None],
+            self.C(T)[None, ...] * torch.ones_like(ep)[..., None]
+            - self.g(T)[None, ...] * h * torch.sign(ep)[..., None],
             -1,
         ).reshape(h.shape + (1,))
 
@@ -922,7 +897,7 @@ def value(self, h):
         Returns:
           torch.tensor:       the kinematic hardening value
         """
-        return torch.sum(h, 1)
+        return torch.sum(h, -1)
 
     def dvalue(self, h):
         """
@@ -935,7 +910,7 @@ def dvalue(self, h):
           torch.tensor:       the derivative of the kinematic hardening value
                               with respect to the internal variables
         """
-        return torch.ones((h.shape[0], self.nback), device=h.device)
+        return torch.ones(h.shape[:-1] + (self.nback,), device=h.device)
 
     @property
     def nhist(self):
@@ -960,8 +935,8 @@ def history_rate(self, s, h, t, ep, T, e):
           torch.tensor:       internal variable rate
         """
         return (
-            self.C(T)[None, ...] * ep[:, None]
-            - self.g(T)[None, ...] * h * torch.abs(ep)[:, None]
+            self.C(T)[None, ...] * ep[..., None]
+            - self.g(T)[None, ...] * h * torch.abs(ep)[..., None]
             - self.b(T)[None, ...] * torch.abs(h) ** (self.r(T)[None, ...] - 1.0) * h
         ).reshape(h.shape)
 
@@ -997,14 +972,14 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to history
         """
-        return torch.diag_embed(-self.g(T)[None, ...] * torch.abs(ep)[:, None]).reshape(
-            h.shape + h.shape[1:]
-        ) + torch.diag_embed(
+        return torch.diag_embed(
+            -self.g(T)[None, ...] * torch.abs(ep)[..., None]
+        ).reshape(h.shape + (self.nback,)) + torch.diag_embed(
             -self.b(T)[None, ...]
             * self.r(T)[None, ...]
             * torch.abs(h) ** (self.r(T)[None, ...] - 1.0)
         ).reshape(
-            h.shape + h.shape[1:]
+            h.shape + (self.nback,)
         )
 
     def dhistory_rate_derate(self, s, h, t, ep, T, e):
@@ -1024,8 +999,8 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
           torch.tensor:       derivative with respect to the inelastic rate
         """
         return torch.unsqueeze(
-            self.C(T)[None, ...] * torch.ones_like(ep)[:, None]
-            - self.g(T)[None, :] * h * torch.sign(ep)[:, None],
+            self.C(T)[None, ...] * torch.ones_like(ep)[..., None]
+            - self.g(T)[None, ...] * h * torch.sign(ep)[..., None],
             -1,
         ).reshape(h.shape + (1,))
 
@@ -1058,9 +1033,10 @@ def value(self, h):
         Returns:
           torch.tensor:       the kinematic hardening value
         """
-        v = torch.zeros(h.shape[0], device=h.device)
+        v = torch.zeros(h.shape[:-1], device=h.device)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            v += model.value(h[:, o : o + n])
+            v += model.value(h[..., o : o + n])
+
         return v
 
     def dvalue(self, h):
@@ -1074,9 +1050,9 @@ def dvalue(self, h):
           torch.tensor:       the derivative of the kinematic hardening value
                               with respect to the internal variables
         """
-        dv = torch.zeros((h.shape[0], self.nhist), device=h.device)
+        dv = torch.zeros(h.shape[:-1] + (self.nhist,), device=h.device)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dv[:, o : o + n] = model.dvalue(h[:, o : o + n])
+            dv[..., o : o + n] = model.dvalue(h[..., o : o + n])
 
         return dv
 
@@ -1104,7 +1080,7 @@ def history_rate(self, s, h, t, ep, T, e):
         """
         hr = torch.zeros_like(h)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            hr[:, o : o + n] = model.history_rate(s, h[:, o : o + n], t, ep, T, e)
+            hr[..., o : o + n] = model.history_rate(s, h[..., o : o + n], t, ep, T, e)
 
         return hr
 
@@ -1125,8 +1101,8 @@ def dhistory_rate_dstress(self, s, h, t, ep, T, e):
         """
         dhr = torch.zeros_like(h)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dhr[:, o : o + n] = model.dhistory_rate_dstress(
-                s, h[:, o : o + n], t, ep, T, e
+            dhr[..., o : o + n] = model.dhistory_rate_dstress(
+                s, h[..., o : o + n], t, ep, T, e
             )
 
         return dhr
@@ -1146,10 +1122,10 @@ def dhistory_rate_dhistory(self, s, h, t, ep, T, e):
         Returns:
           torch.tensor:       derivative with respect to history
         """
-        dhr = torch.zeros(h.shape[0], self.nhist, self.nhist, device=s.device)
+        dhr = torch.zeros(h.shape[:-1] + (self.nhist, self.nhist), device=s.device)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dhr[:, o : o + n, o : o + n] = model.dhistory_rate_dhistory(
-                s, h[:, o : o + n], t, ep, T, e
+            dhr[..., o : o + n, o : o + n] = model.dhistory_rate_dhistory(
+                s, h[..., o : o + n], t, ep, T, e
             )
 
         return dhr
@@ -1172,8 +1148,8 @@ def dhistory_rate_derate(self, s, h, t, ep, T, e):
         """
         dhr = torch.zeros(h.shape + (1,), device=s.device)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dhr[:, o : o + n] = model.dhistory_rate_derate(
-                s, h[:, o : o + n], t, ep, T, e
+            dhr[..., o : o + n, 0:1] = model.dhistory_rate_derate(
+                s, h[..., o : o + n], t, ep, T, e
             )
 
         return dhr
@@ -1196,8 +1172,8 @@ def dhistory_rate_dtotalrate(self, s, h, t, ep, T, e):
         """
         dhr = torch.zeros(h.shape, device=s.device)
         for o, n, model in zip(self.offsets, self.nhist_per, self.models):
-            dhr[:, o : o + n] = model.dhistory_rate_dtotalrate(
-                s, h[:, o : o + n], t, ep, T, e
+            dhr[..., o : o + n] = model.dhistory_rate_dtotalrate(
+                s, h[..., o : o + n], t, ep, T, e
             )
 
         return dhr
diff --git a/pyoptmat/models.py b/pyoptmat/models.py
index 4e0ad52..2cf0d93 100644
--- a/pyoptmat/models.py
+++ b/pyoptmat/models.py
@@ -84,9 +84,9 @@ def forward(self, t, y, erate, T):
           d_y_dot_d_erate:(nbatch,1+nhist+1) Jacobian wrt the strain rate
           d_y_dot_d_T:    (nbatch,1+nhist+1) derivative wrt temperature (unused)
         """
-        stress = y[:, 0].clone()
-        h = y[:, 1 : 1 + self.flowrule.nhist].clone()
-        d = y[:, -1].clone()
+        stress = y[..., 0].clone()
+        h = y[..., 1 : 1 + self.flowrule.nhist].clone()
+        d = y[..., -1].clone()
 
         frate, dfrate = self.flowrule.flow_rate(stress / (1 - d), h, t, T, erate)
         hrate, dhrate = self.flowrule.history_rate(stress / (1 - d), h, t, T, erate)
@@ -101,49 +101,49 @@ def forward(self, t, y, erate, T):
         )
 
         result = torch.empty_like(y, device=y.device)
-        dresult = torch.zeros(y.shape + y.shape[1:], device=y.device)
+        dresult = torch.zeros(y.shape + y.shape[-1:], device=y.device)
 
-        result[:, 0] = self.E(T) * (erate - frate_p)
-        result[:, 1 : 1 + self.flowrule.nhist] = hrate
-        result[:, -1] = drate
+        result[..., 0] = self.E(T) * (erate - frate_p)
+        result[..., 1 : 1 + self.flowrule.nhist] = hrate
+        result[..., -1] = drate
 
-        dresult[:, 0, 0] = -self.E(T) * dfrate_p
+        dresult[..., 0, 0] = -self.E(T) * dfrate_p
 
-        dresult[:, 0:1, 1 : 1 + self.flowrule.nhist] = (
+        dresult[..., 0:1, 1 : 1 + self.flowrule.nhist] = (
             -self.E(T)[..., None, None]
-            * (1 - d)[:, None, None]
+            * (1 - d)[..., None, None]
             * self.flowrule.dflow_dhist(stress / (1 - d), h, t, T, erate)
         )
-        dresult[:, 0, -1] = self.E(T) * (frate - dfrate * stress / (1 - d))
+        dresult[..., 0, -1] = self.E(T) * (frate - dfrate * stress / (1 - d))
 
-        dresult[:, 1 : 1 + self.flowrule.nhist, 0] = (
+        dresult[..., 1 : 1 + self.flowrule.nhist, 0] = (
             self.flowrule.dhist_dstress(stress / (1 - d), h, t, T, erate)
-            / (1 - d)[:, None]
+            / (1 - d)[..., None]
         )
-        dresult[:, 1 : 1 + self.flowrule.nhist, 1 : 1 + self.flowrule.nhist] = dhrate
-        dresult[:, 1 : 1 + self.flowrule.nhist, -1] = (
+        dresult[..., 1 : 1 + self.flowrule.nhist, 1 : 1 + self.flowrule.nhist] = dhrate
+        dresult[..., 1 : 1 + self.flowrule.nhist, -1] = (
             self.flowrule.dhist_dstress(stress / (1 - d), h, t, T, erate)
-            * stress[:, None]
-            / (1 - d[:, None]) ** 2
+            * stress[..., None]
+            / (1 - d[..., None]) ** 2
         )
 
-        dresult[:, -1, 0] = self.dmodel.d_damage_rate_d_s(
+        dresult[..., -1, 0] = self.dmodel.d_damage_rate_d_s(
             stress / (1 - d), d, t, T, erate
         ) / (1 - d)
         # d_damage_d_hist is zero
-        dresult[:, -1, -1] = ddrate
+        dresult[..., -1, -1] = ddrate
 
         # Calculate the derivative wrt the strain rate, used in inverting
         drate = torch.zeros_like(y)
-        drate[:, 0] = self.E(T) * (
+        drate[..., 0] = self.E(T) * (
             1.0
             - (1 - d) * self.flowrule.dflow_derate(stress / (1 - d), h, t, T, erate)
             - self.dmodel.d_damage_rate_d_e(stress / (1 - d), d, t, T, erate) * stress
         )
-        drate[:, 1 : 1 + self.flowrule.nhist] = self.flowrule.dhist_derate(
+        drate[..., 1 : 1 + self.flowrule.nhist] = self.flowrule.dhist_derate(
             stress / (1 - d), h, t, T, erate
         )
-        drate[:, -1] = self.dmodel.d_damage_rate_d_e(stress / (1 - d), d, t, T, erate)
+        drate[..., -1] = self.dmodel.d_damage_rate_d_e(stress / (1 - d), d, t, T, erate)
 
         # Logically we should return the derivative wrt T, but right now
         # we're not going to use it
@@ -159,13 +159,10 @@ class ModelIntegrator(nn.Module):
 
     Args:
       model:                        base strain-controlled model
-      substeps (optional):          subdivide each provided timestep into multiple steps to
-                                    reduce integration error, defaults to 1
       method (optional):            integrate method used to solve the equations, defaults
                                     to `"backward-euler"`
       rtol (optional):              relative tolerance for implicit integration
       atol (optional):              absolute tolerance for implicit integration
-      progress (optional):          print a progress bar for forward time integration
       miter (optional):             maximum nonlinear iterations for implicit time integration
       d0 (optional):                intitial value of damage
       use_adjoint (optional):       if `True` use the adjoint approach to
@@ -175,38 +172,32 @@ class ModelIntegrator(nn.Module):
                                     in the adjoint calculation.  Used if not
                                     all the parameters can be determined by
                                     introspection
-      jit_mode (optional):          if true use the JIT mode which cuts out
-                                    error checking and fixes sizes
     """
 
     def __init__(
         self,
         model,
         *args,
-        substeps=1,
         method="backward-euler",
+        block_size=1,
         rtol=1.0e-6,
         atol=1.0e-4,
-        progress=False,
         miter=100,
         d0=0,
         use_adjoint=True,
         extra_params=None,
-        jit_mode=False,
         **kwargs
     ):
         super().__init__(*args, **kwargs)
         self.model = model
-        self.substeps = substeps
         self.method = method
+        self.block_size = block_size
         self.rtol = rtol
         self.atol = atol
-        self.progress = progress
         self.miter = miter
         self.d0 = d0
         self.use_adjoint = use_adjoint
         self.extra_params = extra_params
-        self.jit_mode = jit_mode
 
         if self.use_adjoint:
             self.imethod = ode.odeint_adjoint
@@ -232,9 +223,9 @@ def solve_both(self, times, temperatures, idata, control):
         # Likely if this happens dt = 0
         rates[torch.isnan(rates)] = 0
 
-        rate_interpolator = utility.CheaterBatchTimeSeriesInterpolator(times, rates)
-        base_interpolator = utility.CheaterBatchTimeSeriesInterpolator(times, idata)
-        temperature_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        rate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(times, rates)
+        base_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(times, idata)
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             times, temperatures
         )
 
@@ -255,14 +246,12 @@ def solve_both(self, times, temperatures, idata, control):
             bmodel,
             init,
             times,
+            block_size=self.block_size,
             method=self.method,
-            substep=self.substeps,
             rtol=self.rtol,
             atol=self.atol,
-            progress=self.progress,
             miter=self.miter,
             extra_params=self.extra_params,
-            jit_mode=self.jit_mode,
         )
 
     def solve_strain(self, times, strains, temperatures):
@@ -287,10 +276,10 @@ def solve_strain(self, times, strains, temperatures):
         # Likely if this happens dt = 0
         strain_rates[torch.isnan(strain_rates)] = 0
 
-        erate_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        erate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             times, strain_rates
         )
-        temperature_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             times, temperatures
         )
 
@@ -307,14 +296,12 @@ def solve_strain(self, times, strains, temperatures):
             emodel,
             init,
             times,
+            block_size=self.block_size,
             method=self.method,
-            substep=self.substeps,
             rtol=self.rtol,
             atol=self.atol,
-            progress=self.progress,
             miter=self.miter,
             extra_params=self.extra_params,
-            jit_mode=self.jit_mode,
         )
 
     def solve_stress(self, times, stresses, temperatures):
@@ -339,13 +326,13 @@ def solve_stress(self, times, stresses, temperatures):
         # Likely if this happens dt = 0
         stress_rates[torch.isnan(stress_rates)] = 0
 
-        stress_rate_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        stress_rate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             times, stress_rates
         )
-        stress_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        stress_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             times, stresses
         )
-        temperature_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             times, temperatures
         )
 
@@ -365,14 +352,12 @@ def solve_stress(self, times, stresses, temperatures):
             smodel,
             init,
             times,
+            block_size=self.block_size,
             method=self.method,
-            substep=self.substeps,
             rtol=self.rtol,
             atol=self.atol,
-            progress=self.progress,
             miter=self.miter,
             extra_params=self.extra_params,
-            jit_mode=self.jit_mode,
         )
 
     def forward(self, t, y):
@@ -429,12 +414,12 @@ def forward(self, t, y):
         e_control = self.control == 0
         s_control = self.control == 1
 
-        actual_rates[e_control] = strain_rates[e_control]
-        actual_rates[s_control] = stress_rates[s_control]
+        actual_rates[..., e_control, :] = strain_rates[..., e_control, :]
+        actual_rates[..., s_control, :] = stress_rates[..., s_control, :]
 
         actual_jacs = torch.zeros_like(strain_jacs)
-        actual_jacs[e_control] = strain_jacs[e_control]
-        actual_jacs[s_control] = stress_jacs[s_control]
+        actual_jacs[..., e_control, :, :] = strain_jacs[..., e_control, :, :]
+        actual_jacs[..., s_control, :, :] = stress_jacs[..., s_control, :, :]
 
         return actual_rates, actual_jacs
 
@@ -497,28 +482,28 @@ def forward(self, t, y):
         cs = self.stress_fn(t)
         cT = self.T_fn(t)
 
-        erate_guess = torch.zeros_like(y[:, 0])[:, None]
+        erate_guess = torch.zeros_like(y[..., 0])[..., None]
 
         def RJ(erate):
             yp = y.clone()
-            yp[:, 0] = cs
-            ydot, _, Je, _ = self.model(t, yp, erate[:, 0], cT)
+            yp[..., 0] = cs
+            ydot, _, Je, _ = self.model(t, yp, erate[..., 0], cT)
 
-            R = ydot[:, 0] - csr
-            J = Je[:, 0]
+            R = ydot[..., 0] - csr
+            J = Je[..., 0]
 
-            return R[:, None], J[:, None, None]
+            return R[..., None], J[..., None, None]
 
         erate, _ = solvers.newton_raphson(RJ, erate_guess)
         yp = y.clone()
-        yp[:, 0] = cs
-        ydot, J, Je, _ = self.model(t, yp, erate[:, 0], cT)
+        yp[..., 0] = cs
+        ydot, J, Je, _ = self.model(t, yp, erate[..., 0], cT)
 
         # Rescale the jacobian
-        J[:, 0, :] = -J[:, 0, :] / Je[:, 0][:, None]
-        J[:, :, 0] = 0
+        J[..., 0, :] = -J[..., 0, :] / Je[..., 0][..., None]
+        J[..., :, 0] = 0
 
         # Insert the strain rate
-        ydot[:, 0] = erate[:, 0]
+        ydot[..., 0] = erate[..., 0]
 
         return ydot, J
diff --git a/pyoptmat/neuralode.py b/pyoptmat/neuralode.py
new file mode 100644
index 0000000..d96a9e6
--- /dev/null
+++ b/pyoptmat/neuralode.py
@@ -0,0 +1,65 @@
+"""Helpers for neural ODEs
+
+Contains miscellaneous helper classes to make working with neural ODEs
+easier in pyoptmat.
+"""
+
+import torch
+
+from pyoptmat import utility
+
+
+class NeuralODE(torch.nn.Module):
+    """Simple wrapper for simple forced neural ODEs
+
+    Mathematically these define a model of the type
+
+    .. math::
+
+        \\dot{y} = m(f(t), y)
+
+    where :math:`y` is the state and :math:`f` is some forcing
+    function.
+
+    The neural model is :math:`m` here, which takes a concatenated
+    combination of :math:`y` and :math:`f(t)` as input and outputs
+    :math:`\\dot{y}`.
+
+    If dim(y) is n and dim(f(t)) is s then the neural network must
+    take n+s inputs and produce n outputs.
+
+    This class handles arbitrary leading batch dimensions and also
+    using torch AD to calculate the (batched) Jacobian for implicit
+    integration schemes.
+
+    Args:
+        model (callable): neural function
+        force (callable): forcing function
+    """
+
+    def __init__(self, model, force, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.model = model
+        self.force = force
+
+        # Wrapped function to calculate jacobian
+        @utility.jacobianize()
+        def model_and_jacobian(x):
+            return self.model(x)
+
+        self.m_and_j = model_and_jacobian
+
+    def forward(self, t, y):
+        """Forward call returns rate and Jacobian
+
+        Args:
+            t (torch.tensor): current times
+            y (torch.tensor): current state
+        """
+        f = self.force(t)
+        x = torch.cat((y, f), dim=-1)
+
+        val, jac = self.m_and_j(x)
+
+        return val, jac[0][..., : -f.shape[-1]]
diff --git a/pyoptmat/ode.py b/pyoptmat/ode.py
index 4429e29..84aae1e 100644
--- a/pyoptmat/ode.py
+++ b/pyoptmat/ode.py
@@ -1,630 +1,470 @@
 """
-  Module defining the key objects and functions to integrate ODEs
-  and provide the sensitivity of the results with respect to the
-  model parameters either through backpropogation AD or the adjoint method.
-
-  The key functions are :py:func:`pyoptmat.odeint` and
-  :py:func:`pyoptmat.odeint_adjoint`.  These functions both
-  integrate a system of ODEs forward in time using one of several methods
-  and have identical function signature.  The only difference is that
-  backward passes over the results of :py:func:`pyoptmat.ode.odeint` will
-  use PyTorch backpropogation AD to return the gradients while
-  :py:func:`pyoptmat.ode.odeint_adjoint` will use the adjoint method.
-
-  The two different backward pass options are seamless, the end-user
-  can interchange them freely with other PyTorch code stacking on top
-  of the integrated results and obtain gradient information just as if they
-  were using AD.
-
-  In all realistic cases explored so far the adjoint method produces faster
-  results with far less memory use and should be preferred versus
-  the AD variant.
-
-  The methods currently available to solve ODEs are detailed below, but
-  as a summary:
-
-  * "forward_euler": simple forward Euler explicit integration
-  * "backward_euler": simple backward Euler implicit integration
-
-  The current methods all accept a :code:`substep` option which will
-  subdivide the provided time intervals into some number of subdivisions
-  to decrease integration error.
+    Module defining the key objects and functions to integrate ODEs
+    and provide the sensitivity of the results with respect to the
+    model parameters either through backpropogation AD or the adjoint method.
+
+    The key functions are :py:func:`pyoptmat.odeint` and
+    :py:func:`pyoptmat.odeint_adjoint`.  These functions both
+    integrate a system of ODEs forward in time using one of several methods
+    and have identical function signature.  The only difference is that
+    backward passes over the results of :py:func:`pyoptmat.ode.odeint` will
+    use PyTorch backpropogation AD to return the gradients while
+    :py:func:`pyoptmat.ode.odeint_adjoint` will use the adjoint method.
+
+    The two different backward pass options are seamless, the end-user
+    can interchange them freely with other PyTorch code stacking on top
+    of the integrated results and obtain gradient information just as if they
+    were using AD.
+
+    In all realistic cases explored so far the adjoint method produces faster
+    results with far less memory use and should be preferred versus
+    the AD variant.
+
+    The methods currently available to solve ODEs are detailed below, but
+    as a summary:
+
+    * "forward_euler": simple forward Euler explicit integration
+    * "backward_euler": simple backward Euler implicit integration
+
+    Both options take a `block_size` parameter, which sets up
+    vectorized/parallelized time integration.  This means the execution
+    device integrates `block_size` time steps at once, rather than
+    one at a time.  This *greatly* accelerates time integration, particularly
+    on GPUs.  The optimal `block_size` parameter will vary based on your
+    system and set of ODEs, but we highly recommend determining an
+    optimal block size for your system and not leaving it set to the
+    default value of 1.
 """
-
 import torch
 
-from pyoptmat import solvers, utility
-
-
-def linear(t0, t1, y0, y1, t):
-    """
-    Helper function for linear interpolation between two points
-
-    .. math::
+from pyoptmat import chunktime
 
-      z = \\frac{y_1-y_0}{t_1-t_0} \\left( t - t_0 \\right)
 
-    Args:
-      t0 (torch.tensor):     first "x" point
-      t1 (torch.tensor):     second "x" point
-      y0 (torch.tensor):     first "y" point
-      y1 (torch.tensor):     second "y" point
-      t (torch.tensor):      target "x" point
-
-    Returns:
-      torch.tensor:         interpolated values
+class BackwardEulerScheme:
     """
-    n = y0.dim() - 1
-    return (
-        y0
-        + (y1 - y0) / (t1 - t0)[(...,) + (None,) * n] * (t - t0)[(...,) + (None,) * n]
-    )
-
-
-class FixedGridSolver:
+    Integration with the backward Euler method
     """
-    Superclass of all solvers that use a fixed grid (versus an adaptive method)
-
-    Args:
-      func (function):      function returning the time rate of change and,
-                            optionally, the jacobian
-      y0 (torch.tensor):    initial condition
-
-    Keyword Args:
-      substep (int):        subdivide each provided timestep into some number
-                            of subdivisions for integration.  Default is `None`
-                            (i.e. no subdivision)
-      jit_mode (bool):      if true do various dangerous things to fix the
-                            model structure.  Default is :code:`False`
-    """
-
-    def __init__(self, func, y0, adjoint_params=None, **kwargs):
-        # Store basic info about the system
-        self.func = func
-        self.y0 = y0
-
-        self.batch_size = self.y0.shape[0]
-        self.prob_size = self.y0.shape[1]
-
-        self.substep = kwargs.pop("substep", None)
-        self.jit_mode = kwargs.pop("jit_mode", False)
-
-        # Store for later
-        self.adjoint_params = adjoint_params
-
-        # Sort out if the function is providing the jacobian
-        self.has_jac = True
-
-        # Whether the method uses the current or previous step
-        self.use_curr = False
-
-        # Cached information for backward pass
-        self.full_result = []
-        self.full_jacobian = []
-        self.full_times = []
-
-        if not self.jit_mode:
-            fake_time = torch.zeros(self.batch_size, device=y0.device)
-            try:
-                a, b = self.func(fake_time, self.y0)
-            except ValueError:
-                self.has_jac = False
-            if not self.has_jac:
-                a = self.func(fake_time, self.y0)
 
-            if a.dim() != 2:
-                raise ValueError(
-                    "ODE solvers require batched functions returning (nbatch, nvars)!"
-                )
-
-            if self.has_jac and ((a.shape + a.shape[1:]) != b.shape):
-                raise ValueError("Function returns Jacobian of the wrong shape!")
-
-            if not self.has_jac:
-                raise ValueError("This implementation requires a hard coded Jacobian!")
-
-    def _get_param_partial(self, t, y, og):
+    def form_operators(self, dy, yd, yJ, dt):
         """
-        Get the partial derivative of the state with respect to the parameters
-        in the adjoint_params at an arbitrary point, dotted with the
-        output_gradient
+        Form the residual and sparse Jacobian of the batched system
 
         Args:
-          t (torch.tensor):      time you want
-          y (torch.tensor):      state you want
-          og (torch.tensor):     thing to dot with
+            dy (torch.tensor): (ntime+1, nbatch, nsize) tensor with the increments in y
+            yd (torch.tensor): (ntime, nbatch, nsize) tensor giving the ODE rate
+            yJ (torch.tensor): (ntime, nbatch, nsize, nsize) tensor giving the derivative
+                of the ODE
+            dt (torch.tensor): (ntime, nbatch) tensor giving the time step
 
         Returns:
-          torch.tensor:           derivative dot product
-        """
-        with torch.enable_grad():
-            ydot = self.func(t, y)[0]
-            # Retain graph seems to be needed for solvers like L-BFGS
-            # which go twice through the function
-            return torch.autograd.grad(ydot, self.adjoint_params, og, retain_graph=True)
-
-    def _construct_grid(self, t):
-        """
-        Construct the subidivided grid for substepped problems.
-
-        The subdivided grid adds :code:`self.substep` extra steps
-        between each point in the original grid :code:`t`.
-
-        Args:
-          t (torch.tensor):   initial timesteps
-
-        Returns:
-          torch.tensor:       subdivided grid
-        """
-        if self.substep and self.substep > 1:
-            nshape = list(t.shape)
-            nshape[0] = (nshape[0] - 1) * self.substep
-
-            grid = torch.empty(nshape, device=t.device)
-            incs = torch.linspace(0, 1, self.substep + 1, device=t.device)[:-1]
-
-            i = 0
-            for t1, t2 in zip(t[:-1], t[1:]):
-                grid[i : i + self.substep] = (
-                    incs * (t2 - t1).unsqueeze(-1) + t1.unsqueeze(-1)
-                ).T
-                i += self.substep
-
-            grid[-1] = t[-1]
-
-            return grid
-
-        return t
-
-    def integrate(self, t, cache_adjoint=False):
-        """
-        Main method: actually integrate through the times :code:`t`.
-
-        Args:
-          t (torch.tensor):       timesteps to report results at
-
-        Keyword Args:
-          cache_adjoint (bool):   store the info we'll need for the
-                                  adjoint pass, default `False`
-
-        Returns:
-          torch.tensor:       integrated results at times :code:`t`
-        """
-        # Basic error checking
-        if not self.jit_mode:
-            if t.dim() != 2 or t.shape[1] != self.batch_size:
-                raise ValueError("Expected times to be a ntime x batch_size array!")
-
-        # Construct the substepped grid
-        times = self._construct_grid(t)
-
-        # Setup "real" results
-        result = torch.empty(
-            t.shape[0], *self.y0.shape, dtype=self.y0.dtype, device=self.y0.device
+            R (torch.tensor): residual tensor of shape
+                (nbatch, ntime * nsize)
+            J (tensor.tensor): sparse Jacobian tensor of logical
+                size (nbatch, ntime*nsize, ntime*nsize)
+        """
+        # Basic shape info
+        ntime = dy.shape[0] - 1
+        prob_size = dy.shape[-1]
+        batch_size = dy.shape[-2]
+
+        # Form the overall residual
+        # R = dy_k - dy_{k-1} - ydot(y) * dt
+        # However, to unsqueeze it we need to combine the 0th and the 2nd dimension
+        R = (
+            (dy[1:] - dy[:-1] - yd[1:] * dt.unsqueeze(-1))
+            .transpose(0, 1)
+            .flatten(start_dim=1)
         )
-        result[0] = self.y0
 
-        # Setup "cached" results, if required
-        if cache_adjoint:
-            self.full_result = torch.empty(
-                times.shape[0],
-                *self.y0.shape,
-                dtype=self.y0.dtype,
-                device=self.y0.device,
-            )
-            self.full_result[0] = self.y0
-            self.full_jacobian = torch.empty(
-                times.shape[0],
-                self.batch_size,
-                self.prob_size,
-                self.prob_size,
-                dtype=self.y0.dtype,
-                device=self.y0.device,
-            )
-            self.full_times = times
-
-        # Start integrating!
-        y0 = self.y0
-        j = 1
-        for k, (t0, t1) in enumerate(zip(times[:-1], times[1:])):
-            y1, J = self._step(t0, t1, t1 - t0, y0)
-
-            # Save the full set of results and the Jacobian, if required for backward pass
-            if cache_adjoint:
-                self.full_result[k + 1] = y1
-                self.full_jacobian[k + 1] = J
-
-            # Interpolate to results point, if requested
-            if torch.any(t1 >= t[j]):  # This could be slow
-                result[j] = linear(t0, t1, y0, y1, t[j])
-                j += 1
-
-            y0 = y1
-
-        result[-1] = y1  # There may be a more elegant way of doing this...
-
-        return result
-
-    def rewind_adjoint(self, times, output_grad):
-        """
-        Rewind the solve the get the partials via the adjoint method, dotted
-        with the :code:`output_grad`
+        # Form the overall jacobian
+        # This has I-J blocks on the main block diagonal and -I on the -1 block diagonal
+        I = torch.eye(prob_size, device=dt.device).expand(ntime, batch_size, -1, -1)
 
-        Args:
-          times (torch.tensor):           output times required by the solver
-          output_grad (torch.tensor):    dot product tensor
-        """
-        # Start from *end* of the output_grad -- going backwards
-        l0 = output_grad[-1]
-        # Useful for indexing
-        nt = self.full_times.shape[0]
-        # Gradient results
-        grad_result = tuple(
-            torch.zeros(p.shape, device=times.device) for p in self.adjoint_params
+        return R, chunktime.BidiagonalForwardOperator(
+            I - yJ[1:] * dt.unsqueeze(-1).unsqueeze(-1), -I[1:]
         )
 
-        # Target output time
-        j = times.shape[0] - 2
-
-        # Run *backwards* through time
-        for curr in range(nt - 2, -1, -1):
-            # Setup all the state so I don't get confused
-            last = curr + 1  # Very confusing lol
-            tcurr = self.full_times[curr]
-            tlast = self.full_times[last]
-            dt = tlast - tcurr  # I define this to be positive
-            # Take the adjoint step
-            l1 = self._adjoint_update(dt, self.full_jacobian[last], l0)
-
-            # Accumulate the gradients
-            # The l0, l1 thing confuses me immensely, but is in fact correct
-            # Note where the dt goes, this is to avoid branching later when summing
-            if self.use_curr:
-                p = self._get_param_partial(
-                    tcurr, self.full_result[curr], l0 * dt[:, None]
-                )
-            else:
-                p = self._get_param_partial(
-                    tlast, self.full_result[last], l1 * dt[:, None]
-                )
-            grad_result = self._accumulate(grad_result, p)
-
-            # Only increment if we've hit an observation point
-            if torch.any(tcurr <= times[j]):
-                l1 = linear(tcurr, tlast, l1, l0, times[j])
-                l0 = l1 + output_grad[j]
-                j -= 1
-            else:
-                l0 = l1
-
-        return grad_result
-
-    def _step(self, t0, t1, dt, y0):
+    def update_adjoint(self, dt, J, a_prev, grads):
         """
-        Actual "take a step" method
-
-        Return the Jacobian even if it isn't used for the adjoint method
+        Update the adjoint for a block of time
 
         Args:
-          t0 (torch.tensor):     previous time
-          t1 (torch.tensor):     next time
-          dt (torch.tensor):     time increment
-          y0 (torch.tensor):     previous state
+            dt (torch.tensor):      block of time steps
+            J (torch.tensor):       block of Jacobians
+            a_prev (torch.tensor):  previous adjoint
+            grads (torch.tensor):   block of gradient values
 
         Returns:
-          torch.tensor:   state at time :code:`t1`
+            adjoint_block (torch.tensor): block of updated adjoint values
         """
-        raise NotImplementedError("_step not implemented in base class!")
+        ntime = J.shape[0] - 1
+        prob_size = J.shape[2]
+        batch_size = J.shape[1]
 
-    def _adjoint_update(self, dt, J, llast):
-        """
-        Update the *adjoint problem* to the next step
+        adjoint_block = torch.zeros(J.shape[:-1], dtype=J.dtype, device=J.device)
+        adjoint_block[0] = a_prev
 
-        The confusing thing here is that we must use the :math:`t_{n+1}`
-        Jacobian as that's what we did on the forward pass, even though that's
-        sort of backwards for the actual backward Euler method
+        # Invert J all at once
+        I = torch.eye(prob_size, device=dt.device).expand(ntime, batch_size, -1, -1)
+        lu, pivot, _ = torch.linalg.lu_factor_ex(
+            I + J[:-1].transpose(-1, -2) * dt.unsqueeze(-1).unsqueeze(-1)
+        )
 
-        Args:
-          dt (torch.tensor):    time step (defined positive!)
-          J (torch.tensor):     cached Jacobian value --
-                                actually :math:`\\frac{d\\dot{y}}{dy}` for this
-                                method
-          llast (torch.tensor): last value of the adjoint
+        # This has to be done sequentially...
+        for i in range(ntime):
+            adjoint_block[i + 1] = torch.linalg.lu_solve(
+                lu[i], pivot[i], adjoint_block[i].unsqueeze(-1)
+            ).squeeze(-1)
+            adjoint_block[i + 1] += grads[i + 1]
 
-        Returns:
-          torch.tensor:           updated value of the adjoint at the "last" time step
-        """
-        raise NotImplementedError("_adjoint_update not implemented in base class!")
+        return adjoint_block
 
-    def _accumulate(self, grad_results, partial):
+    def accumulate(self, prev, time, y, a, grad, grad_fn):
         """
-        Update the *parameter gradient* to the next step
+        Calculate the accumulated value given the results at each time step
 
         Args:
-          grad_results (torch.tensor):    previous value of the gradient results
-          partial (torch.tensor):         value of the parameter partials * dt
+            prev (tuple of tensors): previous results
+            times (tensor): (ntime+1, nbatch) tensor of times
+            y (tensor): (ntime+1, nbatch, nsize) tensor of state
+            a (tensor): (ntime+1, nbatch, nsize) tensor of adjoints
+            grad (tensor): (ntime+1, nbatch, nsize) tensor of gradient values
+            grad_fn (function): function that takes t,y,a and returns the dot product with
+                the model parameters
 
         Returns:
-          tuple of torch.tensor:          updated parameter gradients at the "next" step
+            next (tuple of tensor): updated gradients
         """
-        raise NotImplementedError("_accumulated not implemented in base class!")
+        dt = time.diff(dim=0)
+        g = grad_fn(time[:-1], y[:-1], (a[1:] - grad[1:]) * -dt.unsqueeze(-1))
+        return tuple(pi + gi for pi, gi in zip(prev, g))
 
 
-class ExplicitSolver(FixedGridSolver):
+class ForwardEulerScheme:
     """
-    Superclass of all explicit solvers
+    Integration with the forward Euler method
     """
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.use_curr = True  # Which location it needs the partial via autodiff
-
-    def _adjoint_update(self, dt, J, llast):
-        """
-        Update the *adjoint problem* to the next step
-
-        The confusing thing here is that we must use the :math:`t_{n+1}`
-        Jacobian as that's what we did on the forward pass, even though that's
-        sort of backwards for the actual backward Euler method
-
-        Args:
-          dt (torch.tensor):    time step (defined positive!)
-          J (torch.tensor):     cached Jacobian value --
-                                actually :math:`\\frac{d\\dot{y}}{dy}` for this
-                                method
-          llast (torch.tensor): last value of the adjoint
-
-        Returns:
-          torch.tensor:           updated value of the adjoint at the "last" time step
-        """
-        raise NotImplementedError("_adjoint_update not implemented in base class!")
-
-    def _accumulate(self, grad_results, partial):
+    def form_operators(self, dy, yd, yJ, dt):
         """
-        Update the *parameter gradient* to the next step
+        Form the residual and sparse Jacobian of the batched system
 
         Args:
-          grad_results (torch.tensor):    previous value of the gradient results
-          partial (torch.tensor):         value of the parameter partials * dt
+            dy (torch.tensor): (ntime+1, nbatch, nsize) tensor with the increments in y
+            yd (torch.tensor): (ntime, nbatch, nsize) tensor giving the ODE rate
+            yJ (torch.tensor): (ntime, nbatch, nsize, nsize) tensor giving the derivative
+                of the ODE
+            dt (torch.tensor): (ntime, nbatch) tensor giving the time step
 
         Returns:
-          tuple of torch.tensor:          updated parameter gradients at the "next" step
-        """
-        raise NotImplementedError("_accumulated not implemented in base class!")
+            R (torch.tensor): residual tensor of shape
+                (nbatch, ntime * nsize)
+            J (tensor.tensor): sparse Jacobian tensor of logical size
+                (nbatch, ntime*nsize, ntime*nsize)
+        """
+        # Basic shape info
+        ntime = dy.shape[0] - 1
+        prob_size = dy.shape[-1]
+        batch_size = dy.shape[-2]
+
+        # Form the overall residual
+        # R = dy_k - dy_{k-1} - ydot(y) * dt
+        # However, to unsqueeze it we need to combine the 0th and the 2nd dimension
+        R = (
+            (dy[1:] - dy[:-1] - yd[:-1] * dt.unsqueeze(-1))
+            .transpose(0, 1)
+            .flatten(start_dim=1)
+        )
 
+        # Form the overall jacobian
+        # This has I on the diagonal and -I - J*dt on the off diagonal
+        I = torch.eye(prob_size, device=dt.device).expand(ntime, batch_size, -1, -1)
 
-class ForwardEuler(ExplicitSolver):
-    """
-    Basic forward Euler integration
-    """
+        return R, chunktime.BidiagonalForwardOperator(
+            I, -I[1:] - yJ[1:-1] * dt[1:].unsqueeze(-1).unsqueeze(-1)
+        )
 
-    def _step(self, t0, t1, dt, y0):
+    def update_adjoint(self, dt, J, a_prev, grads):
         """
-        Actual "take a step" method
-
-        Return the Jacobian even if it isn't used for the adjoint method
+        Update the adjoint for a block of time
 
         Args:
-          t0 (torch.tensor):     previous time
-          t1 (torch.tensor):     next time
-          dt (torch.tensor):     time increment
-          y0 (torch.tensor):     previous state
+            dt (torch.tensor):      block of time steps
+            J (torch.tensor):       block of Jacobians
+            a_prev (torch.tensor):  previous adjoint
+            grads (torch.tensor):   block of gradient values
 
         Returns:
-          torch.tensor:   state at time :code:`t1`
+            adjoint_block (torch.tensor): block of updated adjoint values
         """
-        v, J = self.func(t0, y0)
-        return y0 + v * dt[:, None], J
+        ntime = J.shape[0] - 1
 
-    def _adjoint_update(self, dt, J, llast):
-        """
-        Update the *adjoint problem* to the next step
-
-        The confusing thing here is that we must use the :math:`t_{n+1}`
-        Jacobian as that's what we did on the forward pass, even though that's
-        sort of backwards for the actual backward Euler method
+        adjoint_block = torch.zeros(J.shape[:-1], dtype=J.dtype, device=J.device)
+        adjoint_block[0] = a_prev
 
-        Args:
-          dt (torch.tensor):    time step (defined positive!)
-          J (torch.tensor):     cached Jacobian value --
-                                actually :math:`\\frac{d\\dot{y}}{dy}` for this
-                                method
-          llast (torch.tensor): last value of the adjoint
+        # This has to be done sequentially...
+        for i in range(ntime):
+            adjoint_block[i + 1] = adjoint_block[i] - torch.bmm(
+                J[i + 1].transpose(-1, -2), adjoint_block[i].unsqueeze(-1)
+            ).squeeze(-1) * dt[i].unsqueeze(-1)
+            adjoint_block[i + 1] += grads[i + 1]
 
-        Returns:
-          torch.tensor:           updated value of the adjoint at the "last" time step
-        """
-        return (
-            llast
-            + torch.bmm(llast.view(self.batch_size, 1, self.prob_size), J)[:, 0, ...]
-            * dt[..., None]
-        )
+        return adjoint_block
 
-    def _accumulate(self, grad_results, partial):
+    def accumulate(self, prev, time, y, a, grad, grad_fn):
         """
-        Update the *parameter gradient* to the next step
+        Calculate the accumulated value given the results at each time step
 
         Args:
-          grad_results (torch.tensor):    previous value of the gradient results
-          partial (torch.tensor):         value of the parameter partials * dt
+            prev (tuple of tensors): previous results
+            times (tensor): (ntime+1, nbatch) tensor of times
+            y (tensor): (ntime+1, nbatch, nsize) tensor of state
+            a (tensor): (ntime+1, nbatch, nsize) tensor of adjoints
+            grad (tensor): (ntime+1, nbatch, nsize) tensor of gradient values
+            grad_fn (function): function that takes t,y,a and returns the dot product with
+                the model parameters
 
         Returns:
-          tuple of torch.tensor:          updated parameter gradients at the "next" step
+            next (tuple of tensor): updated gradients
         """
-        return tuple(gi + pi for gi, pi in zip(grad_results, partial))
+        dt = time.diff(dim=0)
+        g = grad_fn(time[1:], y[1:], a[:-1] * -dt.unsqueeze(-1))
+        return tuple(pi + gi for pi, gi in zip(prev, g))
 
 
-class ImplicitSolver(FixedGridSolver):
+class FixedGridBlockSolver:
     """
-    Superclass of all implicit solvers
+    Parent class of solvers which operate on a fixed grid (i.e. non-adaptive methods)
+
+    Args:
+        func (function): function returning the time rate of change and the jacobian
+        y0 (torch.tensor): initial condition
 
     Keyword Args:
-      iterative_linear_solver (bool):   if true, solve with an iterative linear scheme
-      rtol (float):                     solver relative tolerance
-      atol (float):                     solver absolute tolerance
-      miter (int):                      maximum nonlinear iterations
-      solver_method (string):           how to solve linear equations, `"diag"` or `"lu"`
-                                        `"diag"` uses just the diagonal of the
-                                        Jacobian -- the dumbest Newton-Krylov scheme ever.
-                                        `"lu"` uses the full LU factorization, ignored
-                                        if using an iterative linear solver
+        scheme (TimeIntegrationScheme):  time integration scheme, default is
+            backward euler
+        block_size (int): target block size
+        rtol (float): relative tolerance for Newton's method
+        atol (float): absolute tolerance for Newton's method
+        miter (int): maximum number of Newton iterations
+        sparse_linear_solver (str): method to solve batched sparse Ax = b, options
+            are currently "direct" or "dense"
+        adjoint_params: parameters to track for the adjoint backward pass
+        guess_type (string): strategy for initial guess, options are "zero" and "previous"
     """
 
     def __init__(
         self,
-        *args,
-        iterative_linear_solver=False,
+        func,
+        y0,
+        scheme=BackwardEulerScheme(),
+        block_size=1,
         rtol=1.0e-6,
-        atol=1.0e-10,
+        atol=1.0e-4,
         miter=100,
-        solver_method="lu",
-        **kwargs
+        linear_solve_method="direct",
+        adjoint_params=None,
+        guess_type="zero",
+        **kwargs,
     ):
-        super().__init__(*args, **kwargs)
-
-        if not self.has_jac:
-            raise ValueError(
-                "Implicit solvers can only be used if the model returns the jacobian"
-            )
+        # Store basic info about the system
+        self.func = func
+        self.y0 = y0
 
-        self.ils = iterative_linear_solver
+        # Time integration scheme
+        self.scheme = scheme
 
-        if not self.ils:
-            self.solver_method = solver_method
-        else:
-            self.solver = solvers.PreconditionerReuseNonlinearSolver()
+        # Size information
+        self.batch_size = self.y0.shape[0]
+        self.prob_size = self.y0.shape[1]
+        self.n = block_size
 
+        # Solver params
         self.rtol = rtol
         self.atol = atol
         self.miter = miter
 
-        self.use_curr = False  # Which location it needs the partial via autodiff
-
-    def _adjoint_update(self, dt, J, llast):
-        """
-        Update the *adjoint problem* to the next step
+        # Store for later
+        self.adjoint_params = adjoint_params
 
-        The confusing thing here is that we must use the :math:`t_{n+1}`
-        Jacobian as that's what we did on the forward pass, even though that's
-        sort of backwards for the actual backward Euler method
+        # Setup the linear solver context
+        self.linear_solve_context = chunktime.ChunkTimeOperatorSolverContext(
+            linear_solve_method, **kwargs
+        )
 
-        Args:
-          dt (torch.tensor):    time step (defined positive!)
-          J (torch.tensor):     cached Jacobian value --
-                                actually :math:`\\frac{d\\dot{y}}{dy}` for this
-                                method
-          llast (torch.tensor): last value of the adjoint
+        # Initial guess for integration
+        self.guess_type = guess_type
 
-        Returns:
-          torch.tensor:           updated value of the adjoint at the "last" time step
-        """
-        raise NotImplementedError("_adjoint_update not implemented in base class!")
+        # Cached solutions
+        self.t = None
+        self.result = None
 
-    def _accumulate(self, grad_results, partial):
+    def integrate(self, t, cache_adjoint=False):
         """
-        Update the *parameter gradient* to the next step
+        Main method: actually integrate through the times :code:`t`.
 
         Args:
-          grad_results (torch.tensor):    previous value of the gradient results
-          partial (torch.tensor):         value of the parameter partials * dt
+          t (torch.tensor):       timesteps to report results at
+          n (int):                target time block size
+
+        Keyword Args:
+          cache_adjoint (bool):   store the info we'll need for the
+                                  adjoint pass, default `False`
 
         Returns:
-          tuple of torch.tensor:          updated parameter gradients at the "next" step
+          torch.tensor:       integrated results at times :code:`t`
         """
-        raise NotImplementedError("_accumulated not implemented in base class!")
+        result = torch.empty(
+            t.shape[0], *self.y0.shape, dtype=self.y0.dtype, device=self.y0.device
+        )
+        result[0] = self.y0
 
-    def _solve_system(self, system, guess):
-        """
-        Dispatch to solve the nonlinear system of equations
+        for k in range(1, t.shape[0], self.n):
+            result[k : k + self.n] = self.block_update(
+                t[k : k + self.n],
+                t[k - 1],
+                result[k - 1],
+                self.func,
+                self._initial_guess(result, k),
+            )
 
-        Args:
-          system (function):      function return R and J
-          guess (torch.tensor):   initial guess at solution
+        # Store for the backward pass, if we're going to do that
+        if cache_adjoint:
+            self.t = t.flip(0)
+            self.result = result.flip(0)
 
-        Returns:
-          (torch.tensor, torch.tensor): the solution and Jacobian evaluated at the solution
+        return result
+
+    def _initial_guess(self, result, k):
         """
-        if not self.ils:
-            return solvers.newton_raphson(
-                system,
-                guess,
-                linsolver=self.solver_method,
-                rtol=self.rtol,
-                atol=self.atol,
-                miter=self.miter,
-            )
-        return self.solver.solve(
-            system, guess, rtol=self.rtol, atol=self.atol, miter=self.miter
-        )
+        Form the initial guess
 
+        Args:
+            result (torch.tensor): currently-populated results
+            k (int): current time step
+        """
+        if self.guess_type == "zero":
+            guess = torch.zeros_like(result[k : k + self.n])
+        elif self.guess_type == "previous":
+            if k - self.n - 1 < 0:
+                guess = torch.zeros_like(result[k : k + self.n])
+            else:
+                guess = result[(k - self.n) : k] - result[k - self.n - 1].unsqueeze(0)
+            blk = self.n - result[k : k + self.n].shape[0]
+            guess = guess[blk:]
+        else:
+            raise ValueError(f"Unknown initial guess strategy {self.guess_type}!")
 
-class BackwardEuler(ImplicitSolver):
-    """
-    The classical backward Euler integration method
-    """
+        return guess.transpose(0, 1).flatten(start_dim=1)
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def _get_param_partial(self, t, y, a):
+        """
+        Get the parameter partial at each time in the block
 
-    def _step(self, t0, t1, dt, y0):
+        Args:
+            t (torch.tensor): (nstep+1,nbatch) tensor of times
+            y (torch.tensor): (nstep+1,nbatch,nsize) tensor of state
+            a (torch.tensor): (nstep+1,nbatch,nsize) adjoint tensor
         """
-        Actual implementation of the time integration
+        with torch.enable_grad():
+            ydot = self.func(t, y)[0]
+            return torch.autograd.grad(ydot, self.adjoint_params, a)
 
-        Return the final Jacobian, even though it's not used,
-        for later use in the adjoint method
+    def rewind(self, output_grad):
+        """
+        Rewind the adjoint to provide the dot product with each output_grad
 
         Args:
-          t0 (torch.tensor):     previous time
-          t1 (torch.tensor):     next time
-          dt (torch.tensor):     time increment
-          y0 (torch.tensor):     previous state
-
-        Returns:
-          (torch.tensor, torch.tensor): next state and Jacobian + identity at that state
+            output_grad (torch.tensor): dot product tensor
         """
-        # Function which returns the residual and jacobian
-        def RJ(x):
-            f, df = self.func(t1, x)
-            return x - y0 - f * dt[..., None], utility.add_id(-df * dt[..., None, None])
+        # Setup results gradients
+        grad_result = tuple(
+            torch.zeros(p.shape, device=output_grad.device) for p in self.adjoint_params
+        )
 
-        return self._solve_system(RJ, y0.clone())
+        # Flip the output_grad
+        output_grad = output_grad.flip(0)
 
-    def _adjoint_update(self, dt, J, llast):
-        """
-        Update the *adjoint problem* to the next step
+        # Calculate starts at least gradient
+        prev_adjoint = output_grad[0]
 
-        The confusing thing here is that we must use the :math:`t_{n+1}`
-        Jacobian as that's what we did on the forward pass, even though
-        that's sort of backwards for the actual implicit Euler method
+        for k in range(1, self.t.shape[0], self.n):
+            # Could also cache these of course
+            _, J = self.func(
+                self.t[k - 1 : k + self.n], self.result[k - 1 : k + self.n]
+            )
 
-        Args:
-          dt (torch.tensor):      time step (defined positive!)
-          Jlast (torch.tensor):   cached Jacobian value -- :math:`I - \\frac{d\\dot{y}}{dy}`
-                                  for this method
-          llast (torch.tensor):   last value of the adjoint
+            full_adjoint = self.scheme.update_adjoint(
+                self.t[k - 1 : k + self.n].diff(dim=0),
+                J,
+                prev_adjoint,
+                output_grad[k - 1 : k + self.n],
+            )
 
-        Returns:
-          torch.tensor:           updated adjoint problem
-        """
-        return torch.linalg.solve(J.transpose(1, 2), llast)
+            # Ugh, best way I can think to do this is to combine everything...
+            grad_result = self.scheme.accumulate(
+                grad_result,
+                self.t[k - 1 : k + self.n],
+                self.result[k - 1 : k + self.n],
+                full_adjoint,
+                output_grad[k - 1 : k + self.n],
+                self._get_param_partial,
+            )
+
+            # Update previous adjoint
+            prev_adjoint = full_adjoint[-1]
 
-    def _accumulate(self, grad_results, partial):
+        return grad_result
+
+    def block_update(self, t, t_start, y_start, func, y_guess):
         """
-        Update the *parameter gradient* to the next step using the consistent
-        integration method
+        Solve a block of times all at once with backward Euler
 
         Args:
-          grad_results (torch.tensor):    previous value of the gradient results
-          partial (torch.tensor):         value of the parameter partials * dt
+            t (torch.tensor):       block of times
+            t_start (torch.tensor): time at start of block
+            y_start (torch.tensor): start of block
+            func (torch.nn.Module): function to use
+        """
+        # Various useful sizes
+        n = t.shape[0]  # Number of time steps to do at once
+
+        def RJ(dy):
+            # Make things into a more rational shape
+            dy = dy.reshape(self.batch_size, n, self.prob_size).transpose(0, 1)
+            # Add a zero to the first dimension of dy
+            dy = torch.vstack((torch.zeros_like(y_start).unsqueeze(0), dy))
+            # Get actual values of state
+            y = dy + y_start.unsqueeze(0).expand(n + 1, -1, -1)
+
+            # Calculate the time steps
+            times = torch.vstack((t_start.unsqueeze(0), t))
+            dt = times.diff(dim=0)
+
+            # Batch update the rate and jacobian
+            yd, yJ = func(times, y)
+
+            return self.scheme.form_operators(dy, yd, yJ, dt)
+
+        dy = chunktime.newton_raphson_chunk(
+            RJ,
+            y_guess,
+            self.linear_solve_context,
+            rtol=self.rtol,
+            atol=self.atol,
+            miter=self.miter,
+        )
 
-        Returns:
-          tuple of torch.tensors:         updated parameter gradients
-        """
-        return tuple(gi + pi for gi, pi in zip(grad_results, partial))
+        return dy.reshape(self.batch_size, n, self.prob_size).transpose(
+            0, 1
+        ) + y_start.unsqueeze(0).expand(n, -1, -1)
 
 
 # Available solver methods mapped to the objects
-methods = {"forward-euler": ForwardEuler, "backward-euler": BackwardEuler}
+int_methods = {
+    "backward-euler": BackwardEulerScheme(),
+    "forward-euler": ForwardEulerScheme(),
+}
 
 
 def odeint(func, y0, times, method="backward-euler", extra_params=None, **kwargs):
@@ -657,7 +497,7 @@ def odeint(func, y0, times, method="backward-euler", extra_params=None, **kwargs
       kwargs:                               keyword arguments passed on to specific
                                             solver methods
     """
-    solver = methods[method](func, y0, **kwargs)
+    solver = FixedGridBlockSolver(func, y0, scheme=int_methods[method], **kwargs)
 
     return solver.integrate(times)
 
@@ -682,7 +522,6 @@ def forward(ctx, solver, times, *params):
             y = solver.integrate(times, cache_adjoint=True)
             # Save the info we will need for the backward pass
             ctx.solver = solver
-            ctx.save_for_backward(times)
             return y
 
     @staticmethod
@@ -693,8 +532,7 @@ def backward(ctx, output_grad):
           output_grad:    grads with which to dot product
         """
         with torch.no_grad():
-            times = ctx.saved_tensors[0]
-            grad_tuple = ctx.solver.rewind_adjoint(times, output_grad)
+            grad_tuple = ctx.solver.rewind(output_grad)
             return (None, None, *grad_tuple)
 
 
@@ -731,11 +569,14 @@ def odeint_adjoint(
       kwargs:                               keyword arguments passed on to specific
                                             solver methods
     """
+    # Grab parameters for backward
     if extra_params is None:
         extra_params = []
     adjoint_params = tuple(p for p in func.parameters()) + tuple(extra_params)
 
-    solver = methods[method](func, y0, adjoint_params=adjoint_params, **kwargs)
+    solver = FixedGridBlockSolver(
+        func, y0, scheme=int_methods[method], adjoint_params=adjoint_params, **kwargs
+    )
 
     wrapper = IntegrateWithAdjoint()
 
diff --git a/pyoptmat/temperature.py b/pyoptmat/temperature.py
index f3b6439..52ddc3f 100644
--- a/pyoptmat/temperature.py
+++ b/pyoptmat/temperature.py
@@ -456,77 +456,30 @@ def shape(self):
         return self.B.shape
 
 
-class KMViscosityScalingGC(TemperatureParameter):
+class PolynomialScaling(TemperatureParameter):
     """
-    Parameter that varies as
-
-    .. math::
-
-      \\exp{B} \\mu \\dot{\\varepsilon}_0^{-1/n}
-
-    where :math:`B = C - g_0 A` is the Kocks-Mecking intercept parameter
-    and the rest are defined in the
-    :py:class:`pyoptmat.temperature.KMRateSensitivityScaling` object.
-
-    :math:`n` is the rate sensitivity, again given by the
-    :py:class:`pyoptmat.temperature.KMRateSensitivityScaling` object
+    Mimics np.polyval using Horner's method to evaluate a polynomial
 
     Args:
-      A (torch.tensor):     Kocks-Mecking slope parameter
-      C (torch.tensor):     Kocks-Mecking cutoff parameter
-      g0 (torch.tensor):    Kocks-Mecking critical energy
-      mu (|TP|):            scalar, temperature-dependent shear modulus
-      eps0 (torch.tensor):  scalar, reference strain rate
-      b (torch.tensor):     scalar, Burger's vector
-      k (torch.tensor):     scalar, Boltzmann constant
+      coefs (torch.tensor): polynomial coefficients in the numpy convention
+                            (highest order 1st)
 
     Keyword Args:
-      A_scale (function):   numerical scaling function for A, defaults to
-                            no scaling
-      C_scale (function):   numerical scaling function for C, defaults to
-                            no scaling
-      g0_scale (function):  numerical scaling function g0, defaults to no
-                            scaling
+      coef_scale_fn (function): numerical scaling function for coefs,
+                                defaults to no scaling
     """
 
-    def __init__(
-        self,
-        A,
-        C,
-        g0,
-        mu,
-        eps0,
-        b,
-        k,
-        *args,
-        A_scale=lambda x: x,
-        C_scale=lambda x: x,
-        g0_scale=lambda x: x,
-        **kwargs
-    ):
+    def __init__(self, coefs, *args, coef_scale_fn=lambda x: x, **kwargs):
         super().__init__(*args, **kwargs)
-        self.A = A
-        self.C = C
-        self.g0 = g0
-        self.mu = mu
-        self.eps0 = eps0
-        self.b = b
-        self.k = k
-
-        self.A_scale = A_scale
-        self.C_scale = C_scale
-        self.g0_scale = g0_scale
-
-        self.n = KMRateSensitivityScaling(
-            self.A, self.mu, self.b, self.k, A_scale=self.A_scale, cutoff=1000
-        )
+        self.coefs = coefs
+        self.scale_fn = coef_scale_fn
 
     @property
     def device(self):
         """
         Return the device used by the scaling function
         """
-        return self.A.device
+        return self.coefs.device
 
     def value(self, T):
         """
@@ -538,92 +491,51 @@ def value(self, T):
         Returns:
           torch.tensor:       value at the given temperatures
         """
-        n = self.n(T)
-        B = self.C_scale(self.C) - self.g0_scale(self.g0) * self.A_scale(self.A)
-        return torch.exp(B) * self.mu(T) * self.eps0 ** (-1.0 / n)
+        acoefs = self.scale_fn(self.coefs)
+
+        res = torch.zeros_like(T) + acoefs[0]
+        for c in acoefs[1:]:
+            res *= T
+            res += c
+
+        return res
 
     @property
     def shape(self):
         """
         Shape of the underlying parameter
         """
-        return self.B.shape
+        return self.coefs[0].shape
 
 
-class KMViscosityScalingCutoff(TemperatureParameter):
+class PiecewiseScaling(TemperatureParameter):
     """
-    Parameter that varies as
-
-    .. math::
-
-      \\exp{B} \\mu \\dot{\\varepsilon}_0^{-1/n}
-
-    where :math:`B` is the Kocks-Mecking intercept parameter and the
-    rest are defined in the :py:class:`pyoptmat.temperature.KMRateSensitivityScaling` object.
-
-    This variant cuts off the viscosity in the high rate/low temperature
-    regime to limit it by :math:`C * \\mu`
-
-    :math:`n` is the rate sensitivity, again given by the
-    :py:class:`pyoptmat.temperature.KMRateSensitivityScaling` object
+    Piecewise linear interpolation, mimics scipy.interp1d with
+    default options
 
     Args:
-      A (torch.tensor):     Kocks-Mecking slope parameter
-      B (torch.tensor):     Kocks-Mecking intercept parameter, sets shape,
-                            must be same shape as A
-      C (torch.tensor):     High rate/low temperature cutff
-      mu (|TP|):            scalar, temperature-dependent shear modulus
-      eps0 (torch.tensor):  scalar, reference strain rate
-      b (torch.tensor):     scalar, Burger's vector
-      k (torch.tensor):     scalar, Boltzmann constant
+      control (torch.tensor):   temperature control points (npoints)
+      values (torch.tensor):    values at control points (optional_batch, npoints)
 
     Keyword Args:
-      A_scale:              numerical scaling function for A, defaults to
-                            no scaling
-      B_scale:              numerical scaling function for B, defaults to
-                            no scaling
-      C_scale:              numerical scaling function for C, defaults to
-                            no scaling
+      values_scale_fn (function):   numerical scaling function for values,
+                                    defaults to no scaling
     """
 
-    def __init__(
-        self,
-        A,
-        B,
-        C,
-        mu,
-        eps0,
-        b,
-        k,
-        *args,
-        A_scale=lambda x: x,
-        B_scale=lambda x: x,
-        C_scale=lambda x: x,
-        **kwargs
-    ):
+    def __init__(self, control, values, *args, values_scale_fn=lambda x: x, **kwargs):
         super().__init__(*args, **kwargs)
-        self.A = A
-        self.B = B
-        self.C = C
-        self.mu = mu
-        self.eps0 = eps0
-        self.b = b
-        self.k = k
 
-        self.A_scale = A_scale
-        self.B_scale = B_scale
-        self.C_scale = C_scale
+        self.control = control
+        self.values = values
 
-        self.n = KMRateSensitivityScaling(
-            self.A, self.mu, self.b, self.k, A_scale=self.A_scale, cutoff=1000
-        )
+        self.values_scale_fn = values_scale_fn
 
     @property
     def device(self):
         """
         Return the device used by the scaling function
         """
-        return self.A.device
+        return self.values.device
 
     def value(self, T):
         """
@@ -635,103 +547,68 @@ def value(self, T):
         Returns:
           torch.tensor:       value at the given temperatures
         """
-        n = self.n(T)
-        return torch.minimum(
-            torch.exp(self.B_scale(self.B)) * self.mu(T) * self.eps0 ** (-1.0 / n),
-            torch.exp(self.C_scale(self.C)) * self.mu(T),
-        )
-
-    @property
-    def shape(self):
-        """
-        Shape of the underlying parameter
-        """
-        return self.B.shape
-
-
-class PolynomialScaling(TemperatureParameter):
-    """
-    Mimics np.polyval using Horner's method to evaluate a polynomial
-
-    Args:
-      coefs (torch.tensor): polynomial coefficients in the numpy convention
-                            (highest order 1st)
-
-    Keyword Args:
-      coef_scale_fn (function): numerical scaling function for coefs,
-                                defaults to no scaling
-    """
-
-    def __init__(self, coefs, *args, coef_scale_fn=lambda x: x, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.coefs = coefs
-        self.scale_fn = coef_scale_fn
+        vcurr = self.values_scale_fn(self.values)
 
-    @property
-    def device(self):
-        """
-        Return the device used by the scaling function
-        """
-        return self.coefs.device
+        slopes = (vcurr[..., 1:] - vcurr[..., :-1]) / (
+            self.control[..., 1:] - self.control[..., :-1]
+        )
 
-    def value(self, T):
-        """
-        Return the function value
+        offsets = T.unsqueeze(-1) - self.control[..., :-1]
 
-        Args:
-          T (torch.tensor):   current temperature
+        poss = (slopes[None, ...] * offsets + vcurr[None, ..., :-1]).reshape(
+            offsets.shape
+        )
 
-        Returns:
-          torch.tensor:       value at the given temperatures
-        """
-        acoefs = self.scale_fn(self.coefs)
+        locs = torch.logical_and(
+            T <= self.control[1:][(...,) + (None,) * T.dim()],
+            T > self.control[:-1][(...,) + (None,) * T.dim()],
+        )
 
-        res = torch.zeros_like(T) + acoefs[0]
-        for c in acoefs[1:]:
-            res *= T
-            res += c
+        # Problem is now that we can have stuff right on control[0]...
+        locs[0] = torch.logical_or(
+            locs[0], T == self.control[0][(...,) + (None,) * T.dim()]
+        )
 
-        return res
+        return poss[locs.movedim(0, -1)].reshape(T.shape)
 
     @property
     def shape(self):
         """
         Shape of the underlying parameter
         """
-        return self.coefs[0].shape
+        return self.values.shape[1:]
 
 
-class PiecewiseScaling(TemperatureParameter):
+class ArrheniusScaling(TemperatureParameter):
     """
-    Piecewise linear interpolation, mimics scipy.interp1d with
-    default options
+    Simple Arrhenius scaling of the type
+
+    .. math::
+
+      A \\exp(-Q/T)
 
     Args:
-      control (torch.tensor):   temperature control points (npoints,)
-      values (torch.tensor):    values at control points (npoints, ) +
-                                param.shape
+      A (torch.tensor):  Prefactor
+      Q (torch.tensor):  Activation energy (times R)
 
-    Keyword Args:
-      values_scale_fn (function):   numerical scaling function for values,
-                                    defaults to no scaling
+    Keyword Args
+      A_scale (function):  numerical scaling for A, defaults to no scaling
+      Q_scale (function):  numerical scaling for Q, defaults to no scaling
     """
 
-    def __init__(self, control, values, *args, values_scale_fn=lambda x: x, **kwargs):
+    def __init__(self, A, Q, *args, A_scale=lambda x: x, Q_scale=lambda x: x, **kwargs):
         super().__init__(*args, **kwargs)
-
-        self.control = control
-        self.values = values
-
-        self.values_scale_fn = values_scale_fn
-
-        self.batch = values.dim() > 1
+        self.A = A
+        self.Q = Q
+        self.A_scale = A_scale
+        self.Q_scale = Q_scale
 
     @property
     def device(self):
         """
         Return the device used by the scaling function
         """
-        return self.values.device
+        return self.A.device
 
     def value(self, T):
         """
@@ -743,40 +620,23 @@ def value(self, T):
         Returns:
           torch.tensor:       value at the given temperatures
         """
-        gi = (
-            torch.remainder(
-                torch.sum((self.control[:, None] - T) <= 0, dim=0),
-                self.control.shape[0],
-            )
-            - 1
-        )
-
-        vcurr = self.values_scale_fn(self.values)
-        slopes = torch.diff(vcurr, dim=-1) / torch.diff(self.control, dim=0)
-
-        if self.batch:
-            return torch.gather(vcurr, -1, gi[:, None])[:, 0] + torch.gather(
-                slopes, -1, gi[:, None]
-            )[:, 0] * (T - self.control[gi])
-        return torch.gather(vcurr, -1, gi) + torch.gather(slopes, -1, gi) * (
-            T - self.control[gi]
-        )
+        return self.A_scale(self.A) * torch.exp(-self.Q_scale(self.Q) / T)
 
     @property
     def shape(self):
         """
         Shape of the underlying parameter
         """
-        return self.values.shape[1:]
+        return self.A.shape
 
 
-class ArrheniusScaling(TemperatureParameter):
+class InverseArrheniusScaling(TemperatureParameter):
     """
     Simple Arrhenius scaling of the type
 
     .. math::
 
-      A \\exp(-Q/T)
+      A (1 - \\exp(-Q/T))
 
     Args:
       A (torch.tensor):  Prefactor
@@ -811,7 +671,7 @@ def value(self, T):
         Returns:
           torch.tensor:       value at the given temperatures
         """
-        return self.A_scale(self.A) * torch.exp(-self.Q_scale(self.Q) / T)
+        return self.A_scale(self.A) * (1.0 - torch.exp(-self.Q_scale(self.Q) / T))
 
     @property
     def shape(self):
diff --git a/pyoptmat/utility.py b/pyoptmat/utility.py
index 5d3bd22..2e57e0c 100644
--- a/pyoptmat/utility.py
+++ b/pyoptmat/utility.py
@@ -4,12 +4,64 @@
   visualization routines.
 """
 
+from functools import reduce, wraps
+
 import numpy as np
 
 import matplotlib.pyplot as plt
 
 import torch
 from torch import nn
+import functorch
+
+
+def compose(f1, f2):
+    """A function composition operator...
+
+    Args:
+        f1 (function): first function
+        f2 (function): second function
+
+    Returns:
+        function: f2(f1)
+    """
+    return f2(f1)
+
+
+def jacobianize(argnums=None):
+    """Decorator that adds the multibatched Jacobian to a function
+
+    Assumes that the function itself has only a single dimension, the last in the shape
+
+    By default provides Jacobian for all *args, but argnums can be set to limit this
+    to certain arguments
+    """
+
+    def inner_jacobianize(fn):
+        @wraps(fn)
+        def wrapper(*args, argnums=argnums, **kwargs):
+            if argnums is None:
+                argnums = tuple(range(len(args)))
+            res = fn(*args, **kwargs)
+            repeats = res.dim() - 1
+            D = reduce(
+                compose,
+                [fn, lambda x: functorch.jacrev(x, argnums=argnums)]
+                + [functorch.vmap] * repeats,
+            )
+
+            return res, D(*args, **kwargs)
+
+        return wrapper
+
+    return inner_jacobianize
+
+
+def mbmm(A1, A2):
+    """
+    Batched matrix-matrix multiplication with several batch dimensions
+    """
+    return torch.einsum("...ik,...kj->...ij", A1, A2)
 
 
 def visualize_variance(strain, stress_true, stress_calc, alpha=0.05):
@@ -58,6 +110,69 @@ def visualize_variance(strain, stress_true, stress_calc, alpha=0.05):
     plt.show()
 
 
+def batch_differentiate(fn, x0, eps=1.0e-6, nbatch_dim=1):
+    """
+    New numerical differentiation function to handle the batched-model cases
+
+    This version handles arbitrary batch sizes
+
+    Args:
+      fn (torch.tensor):    function to differentiate via finite differences
+      x0 (torch.tensor):    point at which to take the numerical derivative
+
+    Keyword Args:
+      eps (float):          perturbation to use
+
+    Returns:
+      torch.tensor:         finite difference approximation to
+                            :math:`\\frac{df}{dx}|_{x_0}`
+    """
+    v0 = fn(x0)
+    bs = v0.shape[:nbatch_dim]
+
+    s1 = v0.shape[nbatch_dim:]
+    s2 = x0.shape[nbatch_dim:]
+    squeeze1 = False
+    squeeze2 = False
+
+    if len(s1) == 0:
+        s1 = (1,)
+        squeeze1 = True
+
+    if len(s2) == 0:
+        s2 = (1,)
+        squeeze2 = True
+
+    d = torch.empty(bs + s1 + s2, device=x0.device)
+
+    x0 = x0.reshape(bs + s2)
+    v0 = v0.reshape(bs + s1)
+
+    for i2 in np.ndindex(s2):
+        dx = torch.zeros_like(x0)
+        inc = torch.abs(x0[..., i2]) * eps
+        inc[inc < eps] = eps
+        dx[..., i2] = inc
+
+        x = x0 + dx
+        if squeeze2:
+            x = x.squeeze(-1)
+
+        v1 = fn(x).reshape(bs + s1)
+        d[..., i2] = (v1 - v0).unsqueeze(-1).expand(d[..., i2].shape) / inc.unsqueeze(
+            -2
+        ).expand(d[..., i2].shape)
+
+    if squeeze1 and squeeze2:
+        d = d.squeeze(-1).squeeze(-1)
+    elif squeeze2:
+        d = d.squeeze(-1)
+    elif squeeze1:
+        d = d.squeeze(-len(s2) - 1)
+
+    return d
+
+
 def new_differentiate(fn, x0, eps=1.0e-6):
     """
     New numerical differentiation function to handle the batched-model cases
@@ -89,7 +204,7 @@ def new_differentiate(fn, x0, eps=1.0e-6):
         flatten = False
     fs2 = (nbatch,) + s2
 
-    d = torch.empty((nbatch,) + s1 + s2)
+    d = torch.empty((nbatch,) + s1 + s2, device=x0.device)
 
     v0 = v0.reshape(fs1)
 
@@ -161,6 +276,72 @@ def differentiate(fn, x0, eps=1.0e-6):
     return d
 
 
+class ArbitraryBatchTimeSeriesInterpolator(nn.Module):
+    """
+    Interpolate :code:`data` located at discrete :code:`times`
+    linearly to point :code:`t`.
+
+    This version handles batched of arbitrary size -- only the rightmost
+    batch dimension must agree with the input data.  All other dimensions are
+    broadcast.
+
+    Args:
+      times (torch.tensor):     input time series as a code:`(ntime,nbatch)`
+                                array
+      values (torch.tensor):    input values series as a code:`(ntime,nbatch)`
+                                array
+    """
+
+    def __init__(self, times, data):
+        super().__init__()
+        # Transpose to put the shared dimension first
+        self.times = times.t()
+        self.values = data.t()
+
+        self.slopes = torch.diff(self.values, dim=-1) / torch.diff(self.times, dim=-1)
+
+    def forward(self, t):
+        """
+        Calculate the linearly-interpolated current values
+
+        Args:
+          t (torch.tensor):   batched times as :code:`(...,nbatch,)` array
+
+        Returns:
+          torch.tensor:       batched values at :code:`t`
+        """
+        squeeze = t.dim() == 1
+
+        t = t.t()  # Transpose so the common dimension is first
+        if squeeze:
+            t = t.unsqueeze(-1)
+
+        # Which dimensions to offset -- likely there is some efficiency
+        # to be gained by flipping these, but probably depends on the
+        # input shape
+        d1 = -1
+        d2 = -2
+
+        offsets = t.unsqueeze(d1) - self.times[..., :-1].unsqueeze(d2)
+
+        poss = self.slopes.unsqueeze(d2) * offsets + self.values[..., :-1].unsqueeze(d2)
+
+        locs = torch.logical_and(
+            t.unsqueeze(d1) <= self.times[..., 1:].unsqueeze(d2),
+            t.unsqueeze(d1) > self.times[..., :-1].unsqueeze(d2),
+        )
+
+        # Now we need to fix up the stupid left side
+        locs[..., 0] = torch.logical_or(
+            locs[..., 0], t == self.times[..., 0].unsqueeze(-1)
+        )
+
+        if squeeze:
+            return poss[locs].squeeze(-1).t()
+
+        return poss[locs].reshape(t.shape).t()
+
+
 class BatchTimeSeriesInterpolator(nn.Module):
     """
     Interpolate :code:`data` located at discrete :code:`times`
diff --git a/requirements.txt b/requirements.txt
index 17fbf74..eba1a1a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ click==8.0.4
 black==22.1.0
 nose2
 pylint==2.15.5
+functorch
diff --git a/setup.py b/setup.py
index a31ac2a..1fb9b65 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     # Name of the project
     name = 'pyoptmat',
     # Version
-    version = '1.1.3',
+    version = '1.2.0',
     # One line-description
     description = "Statistical inference for material models",
     # README
diff --git a/test/test_adjoint_gradients.py b/test/test_adjoint_gradients.py
index c9ff617..34a6096 100644
--- a/test/test_adjoint_gradients.py
+++ b/test/test_adjoint_gradients.py
@@ -102,14 +102,14 @@ def compare(self, **params):
     def test_explicit(self):
         self.compare(method="forward-euler")
 
-    def test_explicit_substep(self):
-        self.compare(method="forward-euler", substeps=2)
-
     def test_implicit(self):
         self.compare(method="backward-euler")
 
-    def test_implicit_substep(self):
-        self.compare(method="backward-euler", substeps=2)
+    def test_explicit_block(self):
+        self.compare(method="forward-euler", block_size=5)
+
+    def test_implicit_block(self):
+        self.compare(method="backward-euler", block_size=5)
 
 
 class TestFullerModelStrain(unittest.TestCase):
@@ -203,14 +203,14 @@ def compare(self, **params):
     def test_explicit(self):
         self.compare(method="forward-euler")
 
-    def test_explicit_substep(self):
-        self.compare(method="forward-euler", substeps=2)
-
     def test_implicit(self):
         self.compare(method="backward-euler")
 
-    def test_implicit_substep(self):
-        self.compare(method="backward-euler", substeps=2)
+    def test_explicit_block(self):
+        self.compare(method="forward-euler", block_size=5)
+
+    def test_implicit_block(self):
+        self.compare(method="backward-euler", block_size=5)
 
 
 class TestFullModelStress(unittest.TestCase):
@@ -305,14 +305,14 @@ def compare(self, **params):
     def test_explicit(self):
         self.compare(method="forward-euler")
 
-    def test_explicit_substep(self):
-        self.compare(method="forward-euler", substeps=2)
-
     def test_implicit(self):
         self.compare(method="backward-euler")
 
-    def test_implicit_substep(self):
-        self.compare(method="backward-euler", substeps=2)
+    def test_explicit_block(self):
+        self.compare(method="forward-euler", block_size=5)
+
+    def test_implicit_block(self):
+        self.compare(method="backward-euler", block_size=5)
 
 
 class TestFullerModelStress(unittest.TestCase):
@@ -406,11 +406,11 @@ def compare(self, **params):
     def test_explicit(self):
         self.compare(method="forward-euler")
 
-    def test_explicit_substep(self):
-        self.compare(method="forward-euler", substeps=2)
-
     def test_implicit(self):
         self.compare(method="backward-euler")
 
-    def test_implicit_substep(self):
-        self.compare(method="backward-euler", substeps=2)
+    def test_explicit_block(self):
+        self.compare(method="forward-euler", block_size=5)
+
+    def test_implicit_block(self):
+        self.compare(method="backward-euler", block_size=5)
diff --git a/test/test_chunktime.py b/test/test_chunktime.py
new file mode 100644
index 0000000..cd063fa
--- /dev/null
+++ b/test/test_chunktime.py
@@ -0,0 +1,74 @@
+from pyoptmat import chunktime
+
+import torch
+
+import unittest
+import warnings
+
+warnings.filterwarnings("ignore")
+
+torch.set_default_tensor_type(torch.DoubleTensor)
+
+
+class TestBackwardEulerChunkTimeOperator(unittest.TestCase):
+    def setUp(self):
+        self.sblk = 6
+        self.nblk = 4
+        self.sbat = 2
+
+        self.blk_A = torch.rand(self.nblk, self.sbat, self.sblk, self.sblk)
+        self.blk_B = (
+            torch.rand(self.nblk - 1, self.sbat, self.sblk, self.sblk) / 10
+        )  # Diagonal dominance
+
+        self.A = chunktime.BidiagonalForwardOperator(self.blk_A, self.blk_B)
+        self.b = torch.rand(self.sbat, self.nblk * self.sblk)
+
+    def test_inv_mat_vec_thomas(self):
+        M = chunktime.BidiagonalThomasFactorization(self.blk_A, self.blk_B)
+        one = torch.linalg.solve(self.A.to_diag().to_dense(), self.b)
+        two = M(self.b)
+
+        self.assertTrue(torch.allclose(one, two))
+
+    def test_mat_vec(self):
+        one = self.A.to_diag().to_dense().matmul(self.b.unsqueeze(-1)).squeeze(-1)
+        two = self.A(self.b)
+
+        self.assertTrue(torch.allclose(one, two))
+
+
+class TestBasicSparseSetup(unittest.TestCase):
+    def setUp(self):
+        self.sblk = 4
+        self.nblk = 3
+        self.sbatch = 4
+
+        self.u = torch.zeros(self.nblk - 2, self.sbatch, self.sblk, self.sblk)
+        self.d = torch.zeros(self.nblk, self.sbatch, self.sblk, self.sblk)
+        self.l = torch.zeros(self.nblk - 1, self.sbatch, self.sblk, self.sblk)
+
+        for i in range(self.nblk - 2):
+            self.u[i] = (i + 2) * 1.0
+        for i in range(self.nblk):
+            self.d[i] = 2.0 * i - 1.0
+        for i in range(self.nblk - 1):
+            self.l[i] = -(i + 1) * 1.0
+
+        self.sp = chunktime.SquareBatchedBlockDiagonalMatrix(
+            [self.d, self.l, self.u], [0, -1, 2]
+        )
+
+    def test_coo(self):
+        coo = self.sp.to_batched_coo()
+        d = coo.to_dense().movedim(-1, 0)
+        od = self.sp.to_dense()
+
+        self.assertTrue(torch.allclose(d, od))
+
+    def test_csr(self):
+        csr_list = self.sp.to_unrolled_csr()
+        od = self.sp.to_dense()
+
+        for i in range(self.sbatch):
+            self.assertTrue(torch.allclose(csr_list[i].to_dense(), od[i]))
diff --git a/test/test_damage.py b/test/test_damage.py
index fdf6b5a..3fa6193 100644
--- a/test/test_damage.py
+++ b/test/test_damage.py
@@ -13,34 +13,43 @@
 class DamageBase:
     def test_d_rate(self):
         exact = self.model.damage_rate(self.s, self.d, self.t, self.T, self.erate)[1]
-        numer = utility.differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.damage_rate(self.s, x, self.t, self.T, self.erate)[0],
             self.d,
+            nbatch_dim=self.bdim,
         )
         self.assertTrue(np.allclose(exact, numer))
 
     def test_d_stress(self):
         exact = self.model.d_damage_rate_d_s(self.s, self.d, self.t, self.T, self.erate)
-        numer = utility.differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.damage_rate(x, self.d, self.t, self.T, self.erate)[0],
             self.s,
+            nbatch_dim=self.bdim,
         )
         self.assertTrue(np.allclose(exact, numer))
 
     def test_d_erate(self):
         exact = self.model.d_damage_rate_d_e(self.s, self.d, self.t, self.T, self.erate)
-        numer = utility.differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.damage_rate(self.s, self.d, self.t, self.T, x)[0],
             self.erate,
+            nbatch_dim=self.bdim,
         )
         self.assertTrue(np.allclose(exact, numer))
 
 
+def batch_transform(tensor, extra_batch):
+    os = tensor.shape
+    return tensor.unsqueeze(0).expand((extra_batch,) + os)
+
+
 class TestNoDamage(unittest.TestCase, DamageBase):
     def setUp(self):
         self.model = damage.NoDamage()
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.d = torch.linspace(0.1, 0.5, self.nbatch)
@@ -57,6 +66,33 @@ def test_damage_rate(self):
         )
 
 
+class TestNoDamageArbitraryBatch(unittest.TestCase, DamageBase):
+    def setUp(self):
+        self.model = damage.NoDamage()
+
+        self.nbatch = 10
+        self.extra_batch = 6
+        self.bdim = 2
+
+        self.s = batch_transform(torch.linspace(90, 100, self.nbatch), self.extra_batch)
+        self.d = batch_transform(
+            torch.linspace(0.1, 0.5, self.nbatch), self.extra_batch
+        )
+        self.t = batch_transform(torch.ones(self.nbatch), self.extra_batch)
+        self.T = batch_transform(torch.zeros_like(self.t), self.extra_batch)
+        self.erate = batch_transform(
+            torch.linspace(1e-2, 1e-3, self.nbatch), self.extra_batch
+        )
+
+    def test_damage_rate(self):
+        self.assertTrue(
+            np.allclose(
+                self.model.damage_rate(self.s, self.d, self.t, self.T, self.erate)[0],
+                torch.zeros_like(self.s),
+            )
+        )
+
+
 class TestHLDamage(unittest.TestCase, DamageBase):
     def setUp(self):
         self.A = 3000.0
@@ -65,6 +101,7 @@ def setUp(self):
         self.model = damage.HayhurstLeckie(CP(self.A), CP(self.xi), CP(self.phi))
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.d = torch.linspace(0.1, 0.5, self.nbatch)
@@ -79,3 +116,33 @@ def test_damage_rate(self):
                 (self.s / self.A) ** (self.xi) * (1 - self.d) ** (self.xi - self.phi),
             )
         )
+
+
+class TestHLDamageArbitraryBatch(unittest.TestCase, DamageBase):
+    def setUp(self):
+        self.A = 3000.0
+        self.xi = 6.5
+        self.phi = 1.7
+        self.model = damage.HayhurstLeckie(CP(self.A), CP(self.xi), CP(self.phi))
+
+        self.nbatch = 10
+        self.extra_batch = 7
+        self.bdim = 2
+
+        self.s = batch_transform(torch.linspace(90, 100, self.nbatch), self.extra_batch)
+        self.d = batch_transform(
+            torch.linspace(0.1, 0.5, self.nbatch), self.extra_batch
+        )
+        self.t = batch_transform(torch.ones(self.nbatch), self.extra_batch)
+        self.T = batch_transform(torch.zeros_like(self.t), self.extra_batch)
+        self.erate = batch_transform(
+            torch.linspace(1e-2, 1e-3, self.nbatch), self.extra_batch
+        )
+
+    def test_damage_rate(self):
+        self.assertTrue(
+            np.allclose(
+                self.model.damage_rate(self.s, self.d, self.t, self.T, self.erate)[0],
+                (self.s / self.A) ** (self.xi) * (1 - self.d) ** (self.xi - self.phi),
+            )
+        )
diff --git a/test/test_flowrules.py b/test/test_flowrules.py
index 0ed944c..24cc9a6 100644
--- a/test/test_flowrules.py
+++ b/test/test_flowrules.py
@@ -13,9 +13,10 @@
 class CommonFlowRule:
     def test_flow_rate(self):
         exact = self.model.flow_rate(self.s, self.h, self.t, self.T, self.erate)[1]
-        numer = utility.differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.flow_rate(x, self.h, self.t, self.T, self.erate)[0],
             self.s,
+            nbatch_dim=1,
         )
 
         self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
@@ -27,9 +28,10 @@ def test_history_rate(self):
         test, exact = self.model.history_rate(
             self.s, self.h, self.t, self.T, self.erate
         )
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(self.s, x, self.t, self.T, self.erate)[0],
             self.h,
+            nbatch_dim=1,
         )
 
         self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
@@ -39,10 +41,11 @@ def test_flow_history(self):
             return
 
         exact = self.model.dflow_dhist(self.s, self.h, self.t, self.T, self.erate)
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.flow_rate(self.s, x, self.t, self.T, self.erate)[0],
             self.h,
-        )
+            nbatch_dim=1,
+        ).unsqueeze(1)
 
         self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
 
@@ -51,18 +54,20 @@ def test_history_stress(self):
             return
 
         exact = self.model.dhist_dstress(self.s, self.h, self.t, self.T, self.erate)
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(x, self.h, self.t, self.T, self.erate)[0],
             self.s,
-        )[..., 0]
+            nbatch_dim=1,
+        )
 
         self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
 
     def test_flow_erate(self):
         exact = self.model.dflow_derate(self.s, self.h, self.t, self.T, self.erate)
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.flow_rate(self.s, self.h, self.t, self.T, x)[0],
             self.erate,
+            nbatch_dim=1,
         )
 
         self.assertTrue(np.allclose(exact, torch.flatten(numer), rtol=1.0e-2))
@@ -72,16 +77,121 @@ def test_history_erate(self):
             return
 
         exact = self.model.dhist_derate(self.s, self.h, self.t, self.T, self.erate)
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(self.s, self.h, self.t, self.T, x)[0],
             self.erate,
-        )[..., 0]
+            nbatch_dim=1,
+        )
+
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-2, atol=1e-6))
+
+
+class CommonFlowRuleBatchBatch:
+    nextra = 6
+
+    def expand(self, T):
+        return T.unsqueeze(0).expand((CommonFlowRuleBatchBatch.nextra,) + T.shape)
+
+    def expand_all(self):
+        return (
+            self.expand(self.s),
+            self.expand(self.h),
+            self.expand(self.t),
+            self.expand(self.T),
+            self.expand(self.erate),
+        )
+
+    def test_flow_rate_bb(self):
+        s, h, t, T, erate = self.expand_all()
+
+        exact = self.model.flow_rate(s, h, t, T, erate)[1]
+        numer = utility.batch_differentiate(
+            lambda x: self.model.flow_rate(x, h, t, T, erate)[0], s, nbatch_dim=2
+        )
 
-        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-2, atol = 1e-6))
+        self.assertEqual(exact.shape, numer.shape)
 
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
+
+    def test_history_rate_bb(self):
+        if self.skip:
+            return
 
-class TestPerfectViscoplasticity(unittest.TestCase, CommonFlowRule):
+        s, h, t, T, erate = self.expand_all()
+
+        test, exact = self.model.history_rate(s, h, t, T, erate)
+        numer = utility.batch_differentiate(
+            lambda x: self.model.history_rate(s, x, t, T, erate)[0], h, nbatch_dim=2
+        )
+
+        self.assertEqual(exact.shape, numer.shape)
+
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
+
+    def test_flow_history_bb(self):
+        if self.skip:
+            return
+
+        s, h, t, T, erate = self.expand_all()
+
+        exact = self.model.dflow_dhist(s, h, t, T, erate)
+        numer = utility.batch_differentiate(
+            lambda x: self.model.flow_rate(s, x, t, T, erate)[0], h, nbatch_dim=2
+        ).unsqueeze(-2)
+
+        self.assertEqual(exact.shape, numer.shape)
+
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
+
+    def test_history_stress_bb(self):
+        if self.skip:
+            return
+
+        s, h, t, T, erate = self.expand_all()
+
+        exact = self.model.dhist_dstress(s, h, t, T, self.erate)
+        numer = utility.batch_differentiate(
+            lambda x: self.model.history_rate(x, h, t, T, erate)[0], s, nbatch_dim=2
+        )
+
+        self.assertEqual(exact.shape, numer.shape)
+
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
+
+    def test_flow_erate_bb(self):
+
+        s, h, t, T, erate = self.expand_all()
+
+        exact = self.model.dflow_derate(s, h, t, T, erate)
+        numer = utility.batch_differentiate(
+            lambda x: self.model.flow_rate(s, h, t, T, x)[0], erate, nbatch_dim=2
+        )
+
+        self.assertEqual(exact.shape, numer.shape)
+
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-2))
+
+    def test_history_erate_bb(self):
+        if self.skip:
+            return
+
+        s, h, t, T, erate = self.expand_all()
+
+        exact = self.model.dhist_derate(s, h, t, T, erate)
+        numer = utility.batch_differentiate(
+            lambda x: self.model.history_rate(s, h, t, T, x)[0], erate, nbatch_dim=2
+        )
+
+        self.assertEqual(exact.shape, numer.shape)
+
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-2, atol=1e-6))
+
+
+class TestPerfectViscoplasticity(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
+
         self.n = torch.tensor(5.0)
         self.eta = torch.tensor(100.0)
 
@@ -97,7 +207,9 @@ def setUp(self):
         self.erate = torch.linspace(1e-2, 1e-3, self.nbatch)
 
 
-class TestWrappedRIIsoKinViscoplasticity(unittest.TestCase, CommonFlowRule):
+class TestWrappedRIIsoKinViscoplasticity(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.n = torch.tensor(5.2)
         self.eta = torch.tensor(110.0)
@@ -152,7 +264,9 @@ def setUp(self):
         self.skip = False
 
 
-class TestKocksMeckingRegimeFlowRule(unittest.TestCase, CommonFlowRule):
+class TestKocksMeckingRegimeFlowRule(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.nbatch = 10
 
@@ -204,8 +318,8 @@ def setUp(self):
             torch.tensor(
                 np.array(
                     [
-                        np.linspace(51, 110, self.nbatch)/10.0,
-                        np.linspace(-100, 210, self.nbatch)[::-1]/10.0,
+                        np.linspace(51, 110, self.nbatch) / 10.0,
+                        np.linspace(-100, 210, self.nbatch)[::-1] / 10.0,
                     ]
                 )
             ).T,
@@ -218,7 +332,10 @@ def setUp(self):
 
         self.skip = False
 
-class TestSoftKocksMeckingRegimeFlowRule(unittest.TestCase, CommonFlowRule):
+
+class TestSoftKocksMeckingRegimeFlowRule(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.nbatch = 10
 
@@ -263,7 +380,14 @@ def setUp(self):
         self.sf = 10.0
 
         self.model = flowrules.SoftKocksMeckingRegimeFlowRule(
-            self.model1, self.model2, self.g0, self.mu, self.b, self.eps0, self.k, self.sf
+            self.model1,
+            self.model2,
+            self.g0,
+            self.mu,
+            self.b,
+            self.eps0,
+            self.k,
+            self.sf,
         )
 
         self.s = torch.linspace(150, 200, self.nbatch)
@@ -285,7 +409,10 @@ def setUp(self):
 
         self.skip = False
 
-class TestSoftKocksMeckingRegimeFlowRuleComplex(unittest.TestCase, CommonFlowRule):
+
+class TestSoftKocksMeckingRegimeFlowRuleComplex(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.nbatch = 10
 
@@ -311,15 +438,15 @@ def setUp(self):
         )
         eps0_ri = 1e-10
         lmbda = 0.99
-        self.model1 = flowrules.RateIndependentFlowRuleWrapper(self.model1p, 
-                lmbda, eps0_ri)
-        
+        self.model1 = flowrules.RateIndependentFlowRuleWrapper(
+            self.model1p, lmbda, eps0_ri
+        )
+
         self.s02 = torch.tensor(50.0)
         self.model2 = flowrules.IsoKinViscoplasticity(
             CP(self.n1), CP(self.eta1), CP(0.0), self.iso1, self.kin1
         )
 
-
         self.mu = CP(1000.0)
         self.b = 1.0
         self.eps0 = 1.0
@@ -329,7 +456,14 @@ def setUp(self):
         self.sf = 10.0
 
         self.model = flowrules.SoftKocksMeckingRegimeFlowRule(
-            self.model1, self.model2, self.g0, self.mu, self.b, self.eps0, self.k, self.sf
+            self.model1,
+            self.model2,
+            self.g0,
+            self.mu,
+            self.b,
+            self.eps0,
+            self.k,
+            self.sf,
         )
 
         self.s = torch.linspace(150, 200, self.nbatch)
@@ -337,8 +471,8 @@ def setUp(self):
             torch.tensor(
                 np.array(
                     [
-                        np.linspace(51, 110, self.nbatch)/10.0,
-                        np.linspace(-100, 210, self.nbatch)[::-1]/10.0,
+                        np.linspace(51, 110, self.nbatch) / 10.0,
+                        np.linspace(-100, 210, self.nbatch)[::-1] / 10.0,
                     ]
                 )
             ).T,
@@ -351,7 +485,10 @@ def setUp(self):
 
         self.skip = False
 
-class TestIsoKinViscoplasticity(unittest.TestCase, CommonFlowRule):
+
+class TestIsoKinViscoplasticity(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.n = torch.tensor(5.2)
         self.eta = torch.tensor(110.0)
@@ -412,7 +549,9 @@ def fn(i):
         self.assertTrue(np.allclose(i1, i2, rtol=1.0e-4))
 
 
-class TestSuperimposedFlowRate(unittest.TestCase, CommonFlowRule):
+class TestSuperimposedFlowRate(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.n1 = torch.tensor(5.2)
         self.eta1 = torch.tensor(110.0)
@@ -470,7 +609,9 @@ def setUp(self):
         self.skip = False
 
 
-class TestIsoKinChabocheViscoplasticity(unittest.TestCase, CommonFlowRule):
+class TestIsoKinChabocheViscoplasticity(
+    unittest.TestCase, CommonFlowRule, CommonFlowRuleBatchBatch
+):
     def setUp(self):
         self.n = torch.tensor(5.2)
         self.eta = torch.tensor(110.0)
diff --git a/test/test_hardening.py b/test/test_hardening.py
index 3c88157..7ca422d 100644
--- a/test/test_hardening.py
+++ b/test/test_hardening.py
@@ -13,7 +13,9 @@
 class HardeningBase:
     def test_dvalue(self):
         exact = self.model.dvalue(self.h)
-        numer = utility.new_differentiate(lambda x: self.model.value(x), self.h)
+        numer = utility.batch_differentiate(
+            lambda x: self.model.value(x), self.h, nbatch_dim=self.bdim
+        )
 
         self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
 
@@ -21,24 +23,26 @@ def test_dstress(self):
         exact = self.model.dhistory_rate_dstress(
             self.s, self.h, self.t, self.ep, self.T, self.erate
         )
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(
                 x, self.h, self.t, self.ep, self.T, self.erate
             ),
             self.s,
+            nbatch_dim=self.bdim,
         )
 
-        self.assertTrue(np.allclose(exact, numer[:, :, 0], rtol=1.0e-4))
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
 
     def test_dhistory(self):
         exact = self.model.dhistory_rate_dhistory(
             self.s, self.h, self.t, self.ep, self.T, self.erate
         )
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(
                 self.s, x, self.t, self.ep, self.T, self.erate
             ),
             self.h,
+            nbatch_dim=self.bdim,
         )
 
         self.assertTrue(np.allclose(exact, numer, rtol=1.0e-3))
@@ -47,27 +51,29 @@ def test_derate(self):
         exact = self.model.dhistory_rate_derate(
             self.s, self.h, self.t, self.ep, self.T, self.erate
         )
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(
                 self.s, self.h, self.t, x, self.T, self.erate
             ),
             self.ep,
+            nbatch_dim=self.bdim,
         )
 
-        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
+        self.assertTrue(np.allclose(exact, numer.unsqueeze(-1), rtol=1.0e-4, atol=1e-7))
 
     def test_dtotalrate(self):
         exact = self.model.dhistory_rate_dtotalrate(
             self.s, self.h, self.t, self.ep, self.T, self.erate
         )
-        numer = utility.new_differentiate(
+        numer = utility.batch_differentiate(
             lambda x: self.model.history_rate(
                 self.s, self.h, self.t, self.ep, self.T, x
             ),
             self.erate,
+            nbatch_dim=self.bdim,
         )
 
-        self.assertTrue(np.allclose(exact, numer[:, :, 0], rtol=1.0e-4))
+        self.assertTrue(np.allclose(exact, numer, rtol=1.0e-4))
 
 
 class TestVoceIsotropicHardening(unittest.TestCase, HardeningBase):
@@ -77,6 +83,7 @@ def setUp(self):
         self.model = hardening.VoceIsotropicHardeningModel(CP(self.R), CP(self.d))
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
@@ -86,6 +93,31 @@ def setUp(self):
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
 
 
+class TestVoceIsotropicHardeningMultiBatch(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.R = torch.tensor(100.0)
+        self.d = torch.tensor(1.2)
+        self.model = hardening.VoceIsotropicHardeningModel(CP(self.R), CP(self.d))
+
+        self.nbatch = 10
+        self.bdim = 2
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
+
 class TestVoceIsotropicThetaHardening(unittest.TestCase, HardeningBase):
     def setUp(self):
         self.tau = torch.tensor(100.0)
@@ -95,6 +127,56 @@ def setUp(self):
         )
 
         self.nbatch = 10
+        self.bdim = 1
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+
+class TestVoceIsotropicThetaHardeningMultiBatch(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.tau = torch.tensor(100.0)
+        self.theta = torch.tensor(12.0)
+        self.model = hardening.Theta0VoceIsotropicHardeningModel(
+            CP(self.tau), CP(self.theta)
+        )
+
+        self.nbatch = 10
+        self.bdim = 2
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
+
+class TestVoceIsotropicThetaRecoveryHardening(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.tau = torch.tensor(100.0)
+        self.theta = torch.tensor(12.0)
+        self.r1 = 0.1
+        self.r2 = 1.2
+        self.R0 = 10.0
+        self.model = hardening.Theta0RecoveryVoceIsotropicHardeningModel(
+            CP(self.tau), CP(self.theta), CP(self.R0), CP(self.r1), CP(self.r2)
+        )
+
+        self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
@@ -104,7 +186,9 @@ def setUp(self):
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
 
 
-class TestVoceIsotropicThetaReceoveryHardening(unittest.TestCase, HardeningBase):
+class TestVoceIsotropicThetaRecoveryHardeningMultiBatch(
+    unittest.TestCase, HardeningBase
+):
     def setUp(self):
         self.tau = torch.tensor(100.0)
         self.theta = torch.tensor(12.0)
@@ -116,6 +200,7 @@ def setUp(self):
         )
 
         self.nbatch = 10
+        self.bdim = 2
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
@@ -124,6 +209,14 @@ def setUp(self):
         self.T = torch.zeros_like(self.t)
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
 
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
 
 class TestFAKinematicHardening(unittest.TestCase, HardeningBase):
     def setUp(self):
@@ -132,6 +225,7 @@ def setUp(self):
         self.model = hardening.FAKinematicHardeningModel(CP(self.C), CP(self.g))
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
@@ -141,6 +235,31 @@ def setUp(self):
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
 
 
+class TestFAKinematicHardeningMultiBatch(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.C = torch.tensor(100.0)
+        self.g = torch.tensor(1.2)
+        self.model = hardening.FAKinematicHardeningModel(CP(self.C), CP(self.g))
+
+        self.nbatch = 10
+        self.bdim = 2
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
+
 class TestFAKinematicHardeningRecovery(unittest.TestCase, HardeningBase):
     def setUp(self):
         self.C = torch.tensor(100.0)
@@ -152,6 +271,7 @@ def setUp(self):
         )
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
@@ -161,6 +281,35 @@ def setUp(self):
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
 
 
+class TestFAKinematicHardeningRecoveryMultiBatch(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.C = torch.tensor(100.0)
+        self.g = torch.tensor(1.2)
+        self.b = torch.tensor(5.0e-4)
+        self.r = torch.tensor(3.0)
+        self.model = hardening.FAKinematicHardeningModel(
+            CP(self.C), CP(self.g), CP(self.b), CP(self.r)
+        )
+
+        self.nbatch = 10
+        self.bdim = 2
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(torch.linspace(50, 110, self.nbatch), (self.nbatch, 1))
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
+
 class TestSuperimposedKinematicHardening(unittest.TestCase, HardeningBase):
     def setUp(self):
         self.C1 = torch.tensor(100.0)
@@ -176,6 +325,7 @@ def setUp(self):
         )
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(
@@ -215,6 +365,41 @@ def test_correct_rate(self):
         )
 
 
+class TestSuperimposedKinematicHardeningMultiBatch(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.C1 = torch.tensor(100.0)
+        self.g1 = torch.tensor(1.2)
+        self.model1 = hardening.FAKinematicHardeningModel(CP(self.C1), CP(self.g1))
+
+        self.C2 = torch.tensor(12.0)
+        self.g2 = torch.tensor(1.5)
+        self.model2 = hardening.FAKinematicHardeningModel(CP(self.C2), CP(self.g2))
+
+        self.model = hardening.SuperimposedKinematicHardening(
+            [self.model1, self.model2]
+        )
+
+        self.nbatch = 10
+        self.bdim = 2
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(
+            torch.linspace(50, 110, 2 * self.nbatch), (self.nbatch, 2)
+        )
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
+
 class TestChabocheKinematicHardening(unittest.TestCase, HardeningBase):
     def setUp(self):
         self.C = torch.tensor([100.0, 1000, 1500])
@@ -222,6 +407,7 @@ def setUp(self):
         self.model = hardening.ChabocheHardeningModel(CP(self.C), CP(self.g))
 
         self.nbatch = 10
+        self.bdim = 1
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(
@@ -234,6 +420,34 @@ def setUp(self):
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
 
 
+class TestChabocheKinematicHardeningMultiBatch(unittest.TestCase, HardeningBase):
+    def setUp(self):
+        self.C = torch.tensor([100.0, 1000, 1500])
+        self.g = torch.tensor([1.2, 100, 50])
+        self.model = hardening.ChabocheHardeningModel(CP(self.C), CP(self.g))
+
+        self.nbatch = 10
+        self.bdim = 2
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(
+            torch.linspace(50, 110, self.nbatch * len(self.C)),
+            (self.nbatch, len(self.C)),
+        )
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
+
+
 class TestChabocheKinematicHardeningRecovery(unittest.TestCase, HardeningBase):
     def setUp(self):
         self.C = torch.tensor([100.0, 1000, 1500])
@@ -245,6 +459,33 @@ def setUp(self):
         )
 
         self.nbatch = 10
+        self.bdim = 1
+
+        self.s = torch.linspace(90, 100, self.nbatch)
+        self.h = torch.reshape(
+            torch.linspace(50, 110, self.nbatch * len(self.C)),
+            (self.nbatch, len(self.C)),
+        )
+        self.t = torch.ones(self.nbatch)
+        self.ep = torch.linspace(0.1, 0.2, self.nbatch)
+        self.T = torch.zeros_like(self.t)
+        self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+
+class TestChabocheKinematicHardeningRecoveryMultiBatch(
+    unittest.TestCase, HardeningBase
+):
+    def setUp(self):
+        self.C = torch.tensor([100.0, 1000, 1500])
+        self.g = torch.tensor([1.2, 100, 50])
+        self.b = torch.tensor([5e-4, 4e-4, 2e-4])
+        self.r = torch.tensor([3.0, 3.2, 3.5])
+        self.model = hardening.ChabocheHardeningModelRecovery(
+            CP(self.C), CP(self.g), CP(self.b), CP(self.r)
+        )
+
+        self.nbatch = 10
+        self.bdim = 2
 
         self.s = torch.linspace(90, 100, self.nbatch)
         self.h = torch.reshape(
@@ -255,3 +496,11 @@ def setUp(self):
         self.ep = torch.linspace(0.1, 0.2, self.nbatch)
         self.T = torch.zeros_like(self.t)
         self.erate = torch.linspace(0.01, 0.02, self.nbatch)
+
+        self.mbatch = 3
+        self.s = self.s.expand((self.mbatch,) + self.s.shape)
+        self.h = self.h.expand((self.mbatch,) + self.h.shape)
+        self.t = self.t.expand((self.mbatch,) + self.t.shape)
+        self.ep = self.s.expand((self.mbatch,) + self.ep.shape)
+        self.T = self.T.expand((self.mbatch,) + self.T.shape)
+        self.erate = self.erate.expand((self.mbatch,) + self.erate.shape)
diff --git a/test/test_model_gradient.py b/test/test_model_gradient.py
index f3b9c03..048e279 100644
--- a/test/test_model_gradient.py
+++ b/test/test_model_gradient.py
@@ -10,7 +10,6 @@
 from pyoptmat.temperature import ConstantParameter as CP
 
 torch.set_default_tensor_type(torch.DoubleTensor)
-torch.autograd.set_detect_anomaly(True)
 
 
 def differ(mfn, p0, eps=1.0e-6):
diff --git a/test/test_models.py b/test/test_models.py
index 75ec370..3b19f60 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -23,10 +23,14 @@ def test_derivs_strain(self):
         )
         strain_rates[torch.isnan(strain_rates)] = 0
 
-        erate_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        #print(self.times.shape)
+        #print(strain_rates.shape)
+        #print(self.t.shape)
+
+        erate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             self.times, strain_rates
         )
-        temperature_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             self.times, self.temperatures
         )
 
@@ -34,7 +38,7 @@ def test_derivs_strain(self):
             self.model, erate_interpolator, temperature_interpolator
         )
         v, dv = use.forward(self.t, self.state_strain)
-        ddv = utility.new_differentiate(
+        ddv = utility.batch_differentiate(
             lambda x: use.forward(self.t, x)[0], self.state_strain
         )
 
@@ -50,13 +54,13 @@ def test_derivs_stress(self):
         )
         stress_rates[torch.isnan(stress_rates)] = 0
 
-        stress_rate_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        stress_rate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             self.times, stress_rates
         )
-        stress_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        stress_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             self.times, self.stresses
         )
-        temperature_interpolator = utility.CheaterBatchTimeSeriesInterpolator(
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
             self.times, self.temperatures
         )
 
@@ -68,7 +72,7 @@ def test_derivs_stress(self):
         )
 
         v, dv = use.forward(self.t, self.state_stress)
-        ddv = utility.new_differentiate(
+        ddv = utility.batch_differentiate(
             lambda x: use.forward(self.t, x)[0], self.state_stress
         )
 
@@ -89,10 +93,11 @@ def test_partial_state(self):
         erate = strain_rates[self.step]
 
         _, exact, _, _ = self.model.forward(t, self.state_strain, erate, T)
-        numer = utility.new_differentiate(
-                lambda y: self.model.forward(t, y, erate, T)[0], self.state_strain)
-        
-        self.assertTrue(torch.allclose(exact, numer, atol = 1e-4, rtol = 1e-4))
+        numer = utility.batch_differentiate(
+            lambda y: self.model.forward(t, y, erate, T)[0], self.state_strain
+        )
+
+        self.assertTrue(torch.allclose(exact, numer, atol=1e-4, rtol=1e-4))
 
     def test_partial_erate(self):
         strain_rates = torch.cat(
@@ -109,13 +114,146 @@ def test_partial_erate(self):
         erate = strain_rates[self.step]
 
         _, _, exact, _ = self.model.forward(t, self.state_strain, erate, T)
-        numer = utility.new_differentiate(
-                lambda y: self.model.forward(t, self.state_strain, y, T)[0], erate).squeeze(-1)
+        numer = utility.batch_differentiate(
+            lambda y: self.model.forward(t, self.state_strain, y, T)[0], erate
+        ).squeeze(-1)
 
-        self.assertTrue(torch.allclose(exact, numer, atol = 1e-4, rtol = 1e-4))
+        self.assertTrue(torch.allclose(exact, numer, atol=1e-4, rtol=1e-4))
 
 
-class TestPerfectViscoplasticity(unittest.TestCase, CommonModel):
+class CommonModelBatchBatch:
+    nextra = 6
+
+    def expand(self, T):
+        return T.unsqueeze(0).expand((CommonModelBatchBatch.nextra,) + T.shape)
+
+    def test_derivs_strain_bb(self):
+        strain_rates = torch.cat(
+            (
+                torch.zeros(1, self.strains.shape[1]),
+                (self.strains[1:] - self.strains[:-1])
+                / (self.times[1:] - self.times[:-1]),
+            )
+        )
+        strain_rates[torch.isnan(strain_rates)] = 0
+
+        erate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
+            self.times, strain_rates
+        )
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
+            self.times, self.temperatures
+        )
+
+        use = models.StrainBasedModel(
+            self.model, erate_interpolator, temperature_interpolator
+        )
+        v, dv = use.forward(self.expand(self.t), self.expand(self.state_strain))
+        ddv = utility.batch_differentiate(
+            lambda x: use.forward(self.expand(self.t), x)[0],
+            self.expand(self.state_strain),
+            nbatch_dim=2,
+        )
+
+        self.assertTrue(np.allclose(dv, ddv, rtol=1e-4, atol=1e-4))
+
+    def test_derivs_stress_bb(self):
+        stress_rates = torch.cat(
+            (
+                torch.zeros(1, self.stresses.shape[1]),
+                (self.stresses[1:] - self.stresses[:-1])
+                / (self.times[1:] - self.times[:-1]),
+            )
+        )
+        stress_rates[torch.isnan(stress_rates)] = 0
+
+        stress_rate_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
+            self.times, stress_rates
+        )
+        stress_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
+            self.times, self.stresses
+        )
+        temperature_interpolator = utility.ArbitraryBatchTimeSeriesInterpolator(
+            self.times, self.temperatures
+        )
+
+        use = models.StressBasedModel(
+            self.model,
+            stress_rate_interpolator,
+            stress_interpolator,
+            temperature_interpolator,
+        )
+
+        v, dv = use.forward(self.expand(self.t), self.expand(self.state_stress))
+        ddv = utility.batch_differentiate(
+            lambda x: use.forward(self.expand(self.t), x)[0],
+            self.expand(self.state_stress),
+            nbatch_dim=2,
+        )
+
+        self.assertTrue(np.allclose(dv, ddv, rtol=1e-4, atol=1e-4))
+
+    def test_partial_state_bb(self):
+        strain_rates = torch.cat(
+            (
+                torch.zeros(1, self.strains.shape[1]),
+                (self.strains[1:] - self.strains[:-1])
+                / (self.times[1:] - self.times[:-1]),
+            )
+        )
+        strain_rates[torch.isnan(strain_rates)] = 0
+
+        t = self.times[self.step]
+        T = self.temperatures[self.step]
+        erate = strain_rates[self.step]
+
+        _, exact, _, _ = self.model.forward(
+            self.expand(t),
+            self.expand(self.state_strain),
+            self.expand(erate),
+            self.expand(T),
+        )
+        numer = utility.batch_differentiate(
+            lambda y: self.model.forward(
+                self.expand(t), y, self.expand(erate), self.expand(T)
+            )[0],
+            self.expand(self.state_strain),
+            nbatch_dim=2,
+        )
+
+        self.assertTrue(torch.allclose(exact, numer, atol=1e-4, rtol=1e-4))
+
+    def test_partial_erate_bb(self):
+        strain_rates = torch.cat(
+            (
+                torch.zeros(1, self.strains.shape[1]),
+                (self.strains[1:] - self.strains[:-1])
+                / (self.times[1:] - self.times[:-1]),
+            )
+        )
+        strain_rates[torch.isnan(strain_rates)] = 0
+
+        t = self.times[self.step]
+        T = self.temperatures[self.step]
+        erate = strain_rates[self.step]
+
+        _, _, exact, _ = self.model.forward(
+            self.expand(t),
+            self.expand(self.state_strain),
+            self.expand(erate),
+            self.expand(T),
+        )
+        numer = utility.batch_differentiate(
+            lambda y: self.model.forward(
+                self.expand(t), self.expand(self.state_strain), y, self.expand(T)
+            )[0],
+            self.expand(erate),
+            nbatch_dim=2,
+        )
+
+        self.assertTrue(torch.allclose(exact, numer, atol=1e-4, rtol=1e-4))
+
+
+class TestPerfectViscoplasticity(unittest.TestCase, CommonModel, CommonModelBatchBatch):
     def setUp(self):
         self.E = torch.tensor(100000.0)
         self.n = torch.tensor(5.2)
@@ -146,7 +284,8 @@ def setUp(self):
         self.model = models.InelasticModel(CP(self.E), self.flowrule)
         self.step = 2
 
-class TestIsoKinViscoplasticity(unittest.TestCase, CommonModel):
+
+class TestIsoKinViscoplasticity(unittest.TestCase, CommonModel, CommonModelBatchBatch):
     def setUp(self):
         self.E = torch.tensor(100000.0)
         self.n = torch.tensor(5.2)
@@ -197,7 +336,9 @@ def setUp(self):
         self.step = 2
 
 
-class TestIsoKinViscoplasticityRecovery(unittest.TestCase, CommonModel):
+class TestIsoKinViscoplasticityRecovery(
+    unittest.TestCase, CommonModel, CommonModelBatchBatch
+):
     def setUp(self):
         self.E = torch.tensor(100000.0)
         self.n = torch.tensor(5.2)
@@ -253,7 +394,7 @@ def setUp(self):
         self.step = 2
 
 
-class TestDamage(unittest.TestCase, CommonModel):
+class TestDamage(unittest.TestCase, CommonModel, CommonModelBatchBatch):
     def setUp(self):
         self.E = torch.tensor(100000.0)
         self.n = torch.tensor(5.2)
@@ -305,7 +446,7 @@ def setUp(self):
         self.step = 2
 
 
-class TestAll(unittest.TestCase, CommonModel):
+class TestAll(unittest.TestCase, CommonModel, CommonModelBatchBatch):
     def setUp(self):
         self.E = torch.tensor(100000.0)
         self.n = torch.tensor(5.2)
diff --git a/test/test_ode.py b/test/test_ode.py
index 3740a1e..e433921 100644
--- a/test/test_ode.py
+++ b/test/test_ode.py
@@ -19,15 +19,15 @@ def __init__(self, r, K):
     def forward(self, t, y):
         return (
             self.r * (1.0 - y / self.K) * y,
-            (self.r - (2 * self.r * y) / self.K)[:, :, None],
+            (self.r - (2 * self.r * y) / self.K)[..., None],
         )
 
     def exact(self, t, y0):
         return (
             self.K
             * torch.exp(self.r * t)
-            * y0[:, 0]
-            / (self.K + (torch.exp(self.r * t) - 1) * y0[:, 0])
+            * y0[..., 0]
+            / (self.K + (torch.exp(self.r * t) - 1) * y0[..., 0])
         )[..., None]
 
 
@@ -58,10 +58,44 @@ def test_backward_default(self):
 
         self.assertTrue(torch.max(torch.abs((numerical - exact) / exact)) < 1.0e-2)
 
-    def test_backward_full(self):
+
+class TestBatchSimpleChunkTime(unittest.TestCase):
+    def setUp(self):
+        self.r = 1.0
+        self.K = 1.0
+
+        self.model = LogisticODE(self.r, self.K)
+        self.nbatch = 20
+        self.y0 = torch.linspace(0.51, 0.76, self.nbatch).reshape(self.nbatch, 1)
+
+        self.nsteps = 100
+
+        self.tchunk = 3
+
+        self.times = torch.tensor(
+            np.array([np.linspace(0, 7.5, self.nsteps) for i in range(self.nbatch)]).T
+        )
+
+    def test_forward_default(self):
         exact = self.model.exact(self.times, self.y0)
         numerical = ode.odeint(
-            self.model, self.y0, self.times, method="backward-euler", solver_method="lu"
+            self.model,
+            self.y0,
+            self.times,
+            method="forward-euler",
+            block_size=self.tchunk,
+        )
+
+        self.assertTrue(torch.max(torch.abs((numerical - exact) / exact)) < 1.0e-2)
+
+    def test_backward_default(self):
+        exact = self.model.exact(self.times, self.y0)
+        numerical = ode.odeint(
+            self.model,
+            self.y0,
+            self.times,
+            method="backward-euler",
+            block_size=self.tchunk,
         )
 
         self.assertTrue(torch.max(torch.abs((numerical - exact) / exact)) < 1.0e-2)
@@ -103,7 +137,7 @@ def exact(self, t, y0):
                 4.0 * (1 - torch.exp(-8.0 * t)),
             ),
             dim=1,
-        )
+        ).permute(0, 2, 1)
 
 
 class TestFallingBatch(unittest.TestCase):
@@ -115,7 +149,8 @@ def setUp(self):
         self.nbatch = 20
         self.y0 = torch.zeros(self.nbatch, 2)
 
-        self.nsteps = 100
+        self.nsteps = 1000
+        self.nchunk = 10
         self.times = torch.tensor(
             np.array([np.linspace(0, 5.0, self.nsteps) for i in range(self.nbatch)]).T
         )
@@ -123,39 +158,28 @@ def setUp(self):
         self.model = FallingODE(self.m, self.w, self.k)
 
     def test_forward(self):
-        exact = self.model.exact(self.times, self.y0).permute(0, 2, 1)
+        exact = self.model.exact(self.times, self.y0)
         numerical = ode.odeint(
-            self.model, self.y0, self.times, method="forward-euler", substep=100
+            self.model,
+            self.y0,
+            self.times,
+            method="forward-euler",
+            block_size=self.nchunk,
         )
 
-        self.assertTrue(
-            torch.max(torch.abs((numerical[1:] - exact[1:]) / exact[1:])) < 1.0e-2
-        )
+        self.assertTrue(torch.max(torch.abs((numerical[1:] - exact[1:]))) < 1.0e-1)
 
     def test_backward(self):
-        exact = self.model.exact(self.times, self.y0).permute(0, 2, 1)
-        numerical = ode.odeint(
-            self.model, self.y0, self.times, method="backward-euler", substep=10
-        )
-
-        self.assertTrue(
-            torch.max(torch.abs((numerical[1:] - exact[1:]) / exact[1:])) < 1.0e-1
-        )
-
-    def test_backward_lu(self):
-        exact = self.model.exact(self.times, self.y0).permute(0, 2, 1)
+        exact = self.model.exact(self.times, self.y0)
         numerical = ode.odeint(
             self.model,
             self.y0,
             self.times,
             method="backward-euler",
-            substep=10,
-            solver_method="lu",
+            block_size=self.nchunk,
         )
 
-        self.assertTrue(
-            torch.max(torch.abs((numerical[1:] - exact[1:]) / exact[1:])) < 1.0e-1
-        )
+        self.assertTrue(torch.max(torch.abs((numerical[1:] - exact[1:]))) < 1.0e-1)
 
 
 class FallingParameterizedODE(torch.nn.Module):
@@ -197,18 +221,14 @@ def exact(self, t, y0):
         )
 
 
-def run_model(
-    model, nbatch=4, method="forward-euler", nsteps=10, substep=2, solver_method="diag"
-):
+def run_model(model, nbatch=4, method="forward-euler", nsteps=10):
     y0 = torch.zeros(nbatch, 2)
 
     times = torch.tensor(
         np.array([np.linspace(0, 1.0, nsteps) for i in range(nbatch)]).T
     )
 
-    numerical = ode.odeint_adjoint(
-        model, y0, times, method=method, substep=substep, solver_method=solver_method
-    )
+    numerical = ode.odeint_adjoint(model, y0, times, method=method)
 
     return torch.norm(numerical)
 
@@ -315,55 +335,3 @@ def test_grad_backward(self):
         self.assertAlmostEqual(dm, dmp, places=3)
         self.assertAlmostEqual(dw, dwp, places=3)
         self.assertAlmostEqual(dk, dwk, places=3)
-
-    def test_grad_backward_lu(self):
-        model = FallingParameterizedODE(self.m, self.w, self.k)
-
-        res1 = run_model(model, method="backward-euler", solver_method="lu")
-        res1.backward()
-
-        r0 = res1.data.numpy()
-
-        dm = model.m.grad.numpy()
-        dw = model.w.grad.numpy()
-        dk = model.k.grad.numpy()
-
-        with torch.no_grad():
-            eps = 1.0e-6
-            dmp = (
-                (
-                    run_model(
-                        FallingParameterizedODE(self.m + self.m * eps, self.w, self.k),
-                        method="backward-euler",
-                        solver_method="lu",
-                    )
-                    - r0
-                )
-                / (eps * self.m)
-            ).numpy()
-            dwp = (
-                (
-                    run_model(
-                        FallingParameterizedODE(self.m, self.w * (1 + eps), self.k),
-                        method="backward-euler",
-                        solver_method="lu",
-                    )
-                    - r0
-                )
-                / (eps * self.w)
-            ).numpy()
-            dwk = (
-                (
-                    run_model(
-                        FallingParameterizedODE(self.m, self.w, self.k * (1 + eps)),
-                        method="backward-euler",
-                        solver_method="lu",
-                    )
-                    - r0
-                )
-                / (eps * self.k)
-            ).numpy()
-
-        self.assertAlmostEqual(dm, dmp, places=3)
-        self.assertAlmostEqual(dw, dwp, places=3)
-        self.assertAlmostEqual(dk, dwk, places=3)
diff --git a/test/test_scaling.py b/test/test_scaling.py
index 1d67e69..6ee9717 100644
--- a/test/test_scaling.py
+++ b/test/test_scaling.py
@@ -10,9 +10,6 @@ def test_forward_back(self):
         actual = self.f.scale(self.x)
         back = self.f.unscale(actual)
 
-        print(self.x)
-        print(back)
-
         self.assertTrue(torch.allclose(self.x, back))
 
 
diff --git a/test/test_solvers.py b/test/test_solvers.py
index 62c4034..32018f5 100644
--- a/test/test_solvers.py
+++ b/test/test_solvers.py
@@ -6,23 +6,26 @@
 
 torch.set_default_dtype(torch.float64)
 
+
 class LinearSolverBase:
     def test_correct(self):
-        A = torch.rand((self.nbatch,self.N,self.N))
-        b = torch.rand((self.nbatch,self.N))
-        
-        x = self.method(A,b)
+        A = torch.rand((self.nbatch, self.N, self.N))
+        b = torch.rand((self.nbatch, self.N))
+
+        x = self.method(A, b)
 
         xp = torch.linalg.solve(A, b)
 
         self.assertTrue(torch.allclose(x, xp))
 
+
 class TestLU(unittest.TestCase, LinearSolverBase):
     def setUp(self):
         self.nbatch = 10
         self.N = 8
 
-        self.method = solvers.lu_linear_solve 
+        self.method = solvers.lu_linear_solve
+
 
 class TestGMRES(unittest.TestCase, LinearSolverBase):
     def setUp(self):
@@ -31,6 +34,7 @@ def setUp(self):
 
         self.method = solvers.gmres
 
+
 class TestGMRESDiag(unittest.TestCase, LinearSolverBase):
     def setUp(self):
         self.nbatch = 20
@@ -38,6 +42,7 @@ def setUp(self):
 
         self.method = solvers.jacobi_gmres
 
+
 class TestGMRESLU(unittest.TestCase, LinearSolverBase):
     def setUp(self):
         self.nbatch = 20
diff --git a/test/test_temperature_scaling.py b/test/test_temperature_scaling.py
index 2c5f97a..e20bb9f 100644
--- a/test/test_temperature_scaling.py
+++ b/test/test_temperature_scaling.py
@@ -15,6 +15,66 @@ def test_value(self):
 
         self.assertTrue(np.allclose(pshould.numpy(), pval.numpy()))
 
+    def test_value_batch(self):
+        pshould = torch.tensor([1.0, 2.0])
+        obj = temperature.ConstantParameter(pshould)
+        pval = obj(torch.linspace(1.0, 10.0, 10))
+
+        self.assertTrue(np.allclose(np.tile(pshould[None, :], (10, 1)), pval.numpy()))
+
+    def test_value_batch_batch(self):
+        pshould = torch.tensor([1.0, 2.0])
+        obj = temperature.ConstantParameter(pshould)
+        pval = obj(torch.linspace(1.0, 10.0, 10).unsqueeze(0).expand(4, 10))
+
+        self.assertTrue(
+            np.allclose(np.tile(pshould[None, None, :], (4, 10, 1)), pval.numpy())
+        )
+
+
+class TestArrheniusScaling(unittest.TestCase):
+    def test_value(self):
+        A = 1.2
+        Q = 100.0
+
+        T = 100.0
+
+        obj = temperature.ArrheniusScaling(torch.tensor(A), torch.tensor(Q))
+        y1 = obj.value(torch.tensor(T))
+        y2 = A * np.exp(-Q / T)
+
+        self.assertTrue(np.allclose(y1.numpy(), y2))
+
+    def test_value_batch(self):
+        A = torch.linspace(1.2, 1.3, 50)
+        Q = torch.linspace(100.0, 120, 50)
+
+        T = torch.linspace(100.0, 200, 50)
+
+        obj = temperature.ArrheniusScaling(A, Q)
+        y1 = obj.value(T)
+        y2 = A.numpy() * np.exp(-Q.numpy() / T.numpy())
+
+        self.assertEqual(y1.shape, (50,))
+        self.assertEqual(y2.shape, (50,))
+
+        self.assertTrue(np.allclose(y1.numpy(), y2))
+
+    def test_value_batch_batch(self):
+        A = torch.linspace(1.2, 1.3, 50)
+        Q = torch.linspace(100.0, 120, 50)
+
+        T = torch.linspace(100.0, 200, 50)
+
+        obj = temperature.ArrheniusScaling(A, Q)
+        y1 = obj.value(T.unsqueeze(0).expand(7, 50))
+        y2 = np.tile((A.numpy() * np.exp(-Q.numpy() / T.numpy()))[None, ...], (7, 1))
+
+        self.assertEqual(y1.shape, (7, 50))
+        self.assertEqual(y2.shape, (7, 50))
+
+        self.assertTrue(np.allclose(y1.numpy(), y2))
+
 
 class TestPolynomialScaling(unittest.TestCase):
     def test_value(self):
@@ -25,6 +85,9 @@ def test_value(self):
 
         y2 = np.polyval(coefs.numpy(), x)
 
+        self.assertEqual(y1.shape, (100,))
+        self.assertEqual(y2.shape, (100,))
+
         self.assertTrue(np.allclose(y1.numpy(), y2))
 
     def test_value_batch(self):
@@ -36,6 +99,23 @@ def test_value_batch(self):
 
         y2 = np.polyval(coefs.numpy(), x)
 
+        self.assertEqual(y1.shape, (100,))
+        self.assertEqual(y2.shape, (100,))
+
+        self.assertTrue(np.allclose(y1.numpy(), y2))
+
+    def test_value_batch_batch(self):
+        coefs = torch.tensor([[1.1] * 100, [2.5] * 100, [3.0] * 100])
+
+        x = torch.ones((100,)) * 1.51
+        obj = temperature.PolynomialScaling(coefs)
+        y1 = obj.value(x.unsqueeze(0).expand(10, 100))
+
+        y2 = np.tile(np.polyval(coefs.numpy(), x)[None, ...], (10, 1))
+
+        self.assertEqual(y1.shape, (10, 100))
+        self.assertEqual(y2.shape, (10, 100))
+
         self.assertTrue(np.allclose(y1.numpy(), y2))
 
     def test_value_constant(self):
@@ -54,7 +134,7 @@ def test_value(self):
         points = torch.tensor([0.0, 10.0, 20.0, 30.0])
         values = torch.tensor([1.0, 2.0, 4.0, -1.0])
 
-        x = torch.linspace(0.1, 29.9, 5)
+        x = torch.linspace(0.1, 29.9, 5)[torch.randperm(5)]
         obj = temperature.PiecewiseScaling(points, values)
 
         y1 = obj.value(x)
@@ -71,17 +151,17 @@ def test_value_batch(self):
         values = torch.tensor(
             np.array(
                 [
-                    np.linspace(0, 1, nbatch),
-                    np.linspace(1, -1, nbatch),
-                    np.linspace(-1, 3, nbatch),
-                    np.linspace(3, 4, nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
                 ]
             )
         ).T
 
         obj = temperature.PiecewiseScaling(points, values)
 
-        x = torch.linspace(0.1, 29.9, nbatch)
+        x = torch.linspace(0.1, 29.9, nbatch)[torch.randperm(nbatch)]
 
         y1 = obj.value(x)
 
@@ -91,6 +171,76 @@ def test_value_batch(self):
             ifn = inter.interp1d(points.numpy(), values[i].numpy())
             y2[i] = ifn(x[i].numpy())
 
+        self.assertEqual(y1.shape, (50,))
+        self.assertEqual(y2.shape, (50,))
+
+        self.assertTrue(np.allclose(y1, y2))
+
+    def test_value_batch_batch(self):
+        nbatch = 50
+        points = torch.tensor([0.0, 10.0, 20.0, 30.0])
+
+        values = torch.tensor(
+            np.array(
+                [
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                ]
+            )
+        ).T
+
+        obj = temperature.PiecewiseScaling(points, values)
+
+        xt = torch.rand(7,nbatch) * 28.8 + 0.1
+
+        y1 = obj.value(xt)
+
+        y2 = np.zeros(y1.numpy().shape)
+        
+        for j in range(7):
+            for i in range(nbatch):
+                ifn = inter.interp1d(points.numpy(), values[i].numpy())
+                y2[j, i] = ifn(xt[j,i].numpy())
+
+        self.assertEqual(y1.shape, (7, 50))
+        self.assertEqual(y2.shape, (7, 50))
+
+        self.assertTrue(np.allclose(y1, y2))
+
+    def test_value_batch_batch_lb(self):
+        nbatch = 50
+        points = torch.tensor([0.0, 10.0, 20.0, 30.0])
+
+        values = torch.tensor(
+            np.array(
+                [
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                    np.random.rand(nbatch),
+                ]
+            )
+        ).T
+
+        obj = temperature.PiecewiseScaling(points, values)
+
+        xt = torch.rand(7,nbatch) * 28.8 + 0.1
+        xt[0,0] = 0.0
+
+        y1 = obj.value(xt)
+
+        y2 = np.zeros(y1.numpy().shape)
+        
+        for j in range(7):
+            for i in range(nbatch):
+                ifn = inter.interp1d(points.numpy(), values[i].numpy())
+                y2[j, i] = ifn(xt[j,i].numpy())
+
+        self.assertEqual(y1.shape, (7, 50))
+        self.assertEqual(y2.shape, (7, 50))
+
         self.assertTrue(np.allclose(y1, y2))
 
 
@@ -125,6 +275,80 @@ def test_value_batch(self):
 
         v2 = self.A * self.mu(Ts)
 
+        self.assertEqual(v1.shape, (50,))
+        self.assertEqual(v2.shape, (50,))
+
+        self.assertTrue(np.allclose(v1.numpy(), v2))
+
+    def test_value_batch_batch(self):
+        nbatch = 50
+        self.A = torch.linspace(0.01, 0.1, nbatch)
+
+        Ts = torch.linspace(25, 950, nbatch) + 273.15
+
+        obj = temperature.ShearModulusScaling(self.A, self.mu)
+
+        v1 = obj.value(Ts.unsqueeze(0).expand((4, 50)))
+
+        v2 = np.tile((self.A * self.mu(Ts))[None, ...], (4, 1))
+
+        self.assertEqual(v1.shape, (4, 50))
+        self.assertEqual(v2.shape, (4, 50))
+
+        self.assertTrue(np.allclose(v1.numpy(), v2))
+
+
+class TestShearModulusScalingExp(unittest.TestCase):
+    def setUp(self):
+        self.mu = temperature.PolynomialScaling(
+            torch.tensor([-1.34689305e-02, -5.18806776e00, 7.86708330e04])
+        )
+
+    def test_value(self):
+        self.A = -2.1
+
+        Ts = torch.linspace(25, 950, 50) + 273.15
+
+        obj = temperature.ShearModulusScalingExp(torch.tensor(self.A), self.mu)
+
+        v1 = obj.value(Ts)
+
+        v2 = np.exp(self.A) * self.mu(Ts)
+
+        self.assertTrue(np.allclose(v1.numpy(), v2))
+
+    def test_value_batch(self):
+        nbatch = 50
+        self.A = torch.linspace(-2.1, -1.0, nbatch)
+
+        Ts = torch.linspace(25, 950, nbatch) + 273.15
+
+        obj = temperature.ShearModulusScalingExp(self.A, self.mu)
+
+        v1 = obj.value(Ts)
+
+        v2 = np.exp(self.A) * self.mu(Ts)
+
+        self.assertEqual(v1.shape, (50,))
+        self.assertEqual(v2.shape, (50,))
+
+        self.assertTrue(np.allclose(v1.numpy(), v2))
+
+    def test_value_batch_batch(self):
+        nbatch = 50
+        self.A = torch.linspace(-2.1, -1.0, nbatch)
+
+        Ts = torch.linspace(25, 950, nbatch) + 273.15
+
+        obj = temperature.ShearModulusScalingExp(self.A, self.mu)
+
+        v1 = obj.value(Ts.unsqueeze(0).expand((4, 50)))
+
+        v2 = np.tile((np.exp(self.A) * self.mu(Ts))[None, ...], (4, 1))
+
+        self.assertEqual(v1.shape, (4, 50))
+        self.assertEqual(v2.shape, (4, 50))
+
         self.assertTrue(np.allclose(v1.numpy(), v2))
 
 
@@ -172,6 +396,33 @@ def test_batch(self):
             1.0 - (self.k * Ts / (self.mu(Ts) * self.b**3.0 * g0)) ** (1 / q)
         ) ** (1 / p)
 
+        self.assertEqual(v1.shape, (50,))
+        self.assertEqual(v2.shape, (50,))
+
+        self.assertTrue(np.allclose(v1, v2))
+
+    def test_batch_batch(self):
+        nbatch = 50
+
+        tau0 = torch.linspace(100.0, 1000.0, nbatch)
+        g0 = torch.linspace(0.5, 1.0, nbatch)
+        p = torch.linspace(2.0 / 3.0, 1.0, nbatch)
+        q = 1.0 / 3.0
+
+        Ts = torch.linspace(25, 950, 50) + 273.15
+
+        obj = temperature.MTSScaling(tau0, g0, q, p, self.k, self.b, self.mu)
+
+        v1 = obj.value(Ts.unsqueeze(0).expand((6, 50)))
+
+        v2 = tau0 * (
+            1.0 - (self.k * Ts / (self.mu(Ts) * self.b**3.0 * g0)) ** (1 / q)
+        ) ** (1 / p)
+        v2 = np.tile(v2[None, ...], (6, 1))
+
+        self.assertEqual(v1.shape, (6, 50))
+        self.assertEqual(v2.shape, (6, 50))
+
         self.assertTrue(np.allclose(v1, v2))
 
 
@@ -215,6 +466,33 @@ def test_value_batch(self):
         v2 = -mu_values * b**3.0 / (k * Ts * A.numpy())
         v2 = np.minimum(v2, 20.0)
 
+        self.assertEqual(v1.shape, (50,))
+        self.assertEqual(v2.shape, (50,))
+
+        self.assertTrue(np.allclose(v1.numpy(), v2))
+
+    def test_value_batch_batch(self):
+        A = torch.linspace(-8.679 - 1, -8.679 + 1, 50)
+        mu = temperature.PolynomialScaling(
+            torch.tensor([-1.34689305e-02, -5.18806776e00, 7.86708330e04])
+        )
+        b = 2.474e-7
+        k = 1.38064e-20
+
+        Ts = torch.linspace(25, 950.0, 50) + 273.15
+
+        obj = temperature.KMRateSensitivityScaling(A, mu, b, k)
+        v1 = obj.value(Ts.unsqueeze(0).expand(6, 50))
+
+        mu_values = np.array([mu.value(T).numpy() for T in Ts])
+
+        v2 = -mu_values * b**3.0 / (k * Ts * A.numpy())
+        v2 = np.minimum(v2, 20.0)
+        v2 = np.tile(v2[None, ...], (6, 1))
+
+        self.assertEqual(v1.shape, (6, 50))
+        self.assertEqual(v2.shape, (6, 50))
+
         self.assertTrue(np.allclose(v1.numpy(), v2))
 
 
@@ -264,4 +542,37 @@ def test_value_batch(self):
             * mu_values
             * eps0 ** (k * Ts.numpy() * A.numpy() / (mu_values * b**3.0))
         )
+
+        self.assertTrue(v1.shape, (50,))
+        self.assertTrue(v2.shape, (50,))
+
+        self.assertTrue(np.allclose(v1, v2))
+
+    def test_value_batch_batch(self):
+        A = torch.linspace(-8.679 - 1, -8.679 + 1, 50)
+        B = torch.linspace(-0.744, -0.80, 50)
+        mu = temperature.PolynomialScaling(
+            torch.tensor([-1.34689305e-02, -5.18806776e00, 7.86708330e04])
+        )
+        b = 2.474e-7
+        k = 1.38064e-20
+        eps0 = 1e10
+
+        Ts = torch.linspace(25, 950.0, 50) + 273.15
+        mu_values = np.array([mu.value(T).numpy() for T in Ts])
+
+        obj = temperature.KMViscosityScaling(A, B, mu, eps0, b, k)
+
+        v1 = obj.value(Ts.unsqueeze(0).expand(8, 50))
+        v2 = (
+            np.exp(B.numpy())
+            * mu_values
+            * eps0 ** (k * Ts.numpy() * A.numpy() / (mu_values * b**3.0))
+        )
+
+        v2 = np.tile(v2[None, ...], (8, 1))
+
+        self.assertTrue(v1.shape, (8, 50))
+        self.assertTrue(v2.shape, (8, 50))
+
         self.assertTrue(np.allclose(v1, v2))
diff --git a/test/test_utility.py b/test/test_utility.py
index 7df25c7..ddb8d11 100644
--- a/test/test_utility.py
+++ b/test/test_utility.py
@@ -7,6 +7,48 @@
 import numpy.random as ra
 import scipy.interpolate as inter
 
+torch.set_default_tensor_type(torch.DoubleTensor)
+
+
+class TestArbitraryBatchTimeSeriesInterpolator(unittest.TestCase):
+    def setUp(self):
+        self.ntime = 100
+        self.nbatch = 50
+        self.times = np.empty((self.ntime, self.nbatch))
+        self.values = np.empty((self.ntime, self.nbatch))
+        for i in range(self.nbatch):
+            tmax = ra.random()
+            self.times[:, i] = np.linspace(0, tmax, self.ntime)
+            self.values[:, i] = ra.random((self.ntime,))
+
+        self.ref_obj = utility.BatchTimeSeriesInterpolator(
+            torch.tensor(self.times), torch.tensor(self.values)
+        )
+        self.obj = utility.ArbitraryBatchTimeSeriesInterpolator(
+            torch.tensor(self.times), torch.tensor(self.values)
+        )
+
+    def test_interpolate(self):
+        y = 10
+        X = torch.zeros((y, self.nbatch))
+        for i in range(self.nbatch):
+            tmin, tmax = sorted(
+                list(ra.uniform(0, np.max(self.times[:, i]), size=(2,)))
+            )
+            X[:, i] = torch.linspace(tmin, tmax, y)
+
+        # Answer done one at a time
+        Y1 = torch.zeros((y, self.nbatch))
+        for i in range(y):
+            Y1[i] = self.ref_obj(X[i])
+
+        # With the new object
+        Y2 = self.obj(X)
+
+        print(Y2.shape)
+
+        self.assertTrue(torch.allclose(Y1, Y2))
+
 
 class TestInterpolateBatchTimesObject(unittest.TestCase):
     def setUp(self):
@@ -14,7 +56,6 @@ def setUp(self):
         self.nbatch = 50
         self.times = np.empty((self.ntime, self.nbatch))
         self.values = np.empty((self.ntime, self.nbatch))
-        self.X = np.empty((self.ntime, self.nbatch, 2))
         for i in range(self.nbatch):
             tmax = ra.random()
             self.times[:, i] = np.linspace(0, tmax, self.ntime)
@@ -63,7 +104,6 @@ def setUp(self):
         self.nbatch = 50
         self.times = np.empty((self.ntime, self.nbatch))
         self.values = np.empty((self.ntime, self.nbatch))
-        self.X = np.empty((self.ntime, self.nbatch, 2))
         for i in range(self.nbatch):
             tmax = ra.random()
             self.times[:, i] = np.linspace(0, tmax, self.ntime)
@@ -92,7 +132,6 @@ def setUp(self):
         self.nbatch = 50
         self.times = np.empty((self.ntime, self.nbatch))
         self.values = np.empty((self.ntime, self.nbatch))
-        self.X = np.empty((self.ntime, self.nbatch, 2))
         for i in range(self.nbatch):
             tmax = ra.random()
             self.times[:, i] = np.linspace(0, tmax, self.ntime)
@@ -144,7 +183,6 @@ def setUp(self):
         self.nbatch = 50
         self.times = np.linspace(0, ra.random(), self.ntime)
         self.values = np.empty((self.ntime, self.nbatch))
-        self.X = np.empty((self.ntime, self.nbatch, 2))
         for i in range(self.nbatch):
             self.values[:, i] = ra.random((self.ntime,))