diff --git a/configure.ac b/configure.ac
index 9640ed009..86207c3a3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -272,6 +272,7 @@ AC_CONFIG_FILES([bin/sstcc], [chmod +x bin/sstcc])
 AC_CONFIG_FILES([bin/sstccvars.py])
 AC_CONFIG_FILES([tests/runtest], [chmod +x tests/runtest])
 AC_CONFIG_FILES([tests/checktest], [chmod +x tests/checktest])
+AC_CONFIG_FILES([tests/checkdiff], [chmod +x tests/checkdiff])
 
 AC_OUTPUT
 
diff --git a/sstmac/skeletons/Makefile.am b/sstmac/skeletons/Makefile.am
index ef62ab1a0..3eb3ed946 100644
--- a/sstmac/skeletons/Makefile.am
+++ b/sstmac/skeletons/Makefile.am
@@ -23,6 +23,9 @@ nobase_library_include_HEADERS = \
 libsstmac_skeletons_la_LDFLAGS = 
 
 libsstmac_skeletons_la_SOURCES = \
+  fft/fft.cc \
+  halo3d-26/halo3d-26.cc \
+  sweep3d/sweep3d.cc \
   traffic_matrix/main.cc \
   undumpi/parsedumpi.cc \
   undumpi/parsedumpi_callbacks.cc 
diff --git a/sstmac/skeletons/fft/fft.cc b/sstmac/skeletons/fft/fft.cc
new file mode 100644
index 000000000..69dbe9346
--- /dev/null
+++ b/sstmac/skeletons/fft/fft.cc
@@ -0,0 +1,178 @@
+/**
+Copyright 2009-2020 National Technology and Engineering Solutions of Sandia,
+LLC (NTESS).  Under the terms of Contract DE-NA-0003525, the U.S.  Government
+retains certain rights in this software.
+
+Sandia National Laboratories is a multimission laboratory managed and operated
+by National Technology and Engineering Solutions of Sandia, LLC., a wholly
+owned subsidiary of Honeywell International, Inc., for the U.S. Department of
+Energy's National Nuclear Security Administration under contract DE-NA0003525.
+
+Copyright (c) 2009-2020, NTESS
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Questions? Contact sst-macro-help@sandia.gov
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <cstring>
+#include <sstmac/replacements/mpi.h>
+
+#define MP_X 0
+#define MP_Y 1
+#define MP_Z 2
+
+#define calc_pe(a,b,c)  ((a)+(b)*dims[MP_X]+(c)*dims[MP_X]*dims[MP_Y])
+
+#define sstmac_app_name fft
+int USER_MAIN(int argc, char **argv)
+{
+  int world_rank, numranks;
+  MPI_Init(&argc,&argv);
+  MPI_Comm_rank(MPI_COMM_WORLD,&world_rank);
+  MPI_Comm_size(MPI_COMM_WORLD,&numranks);
+
+  int myrank = world_rank;
+  MPI_Comm comm = MPI_COMM_WORLD;
+
+  int dims[3] = {0, 0, 0};
+
+  int msg_size_x = 0;
+  int msg_size_y = 0;
+  int msg_size_z = 0;
+  int MAX_ITER = 10;
+  int print = 0;
+
+  for (int i = 0; i < argc; ++i) {
+    if (strcmp("-pex", argv[i]) == 0) {
+      dims[MP_X] = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-pey", argv[i]) == 0) {
+      dims[MP_Y] = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-pez", argv[i]) == 0) {
+      dims[MP_Z] = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-iterations", argv[i]) == 0) {
+      MAX_ITER = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-nx", argv[i]) == 0) {
+      msg_size_x = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-ny", argv[i]) == 0) {
+      msg_size_y = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-nz", argv[i]) == 0) {
+      msg_size_z = atoi(argv[i + 1]);
+      i++;
+    }  else if (strcmp(argv[i], "-print") == 0){
+      print = atol(argv[i+1]);
+      ++i;
+    }
+  }
+
+  if(dims[MP_X] * dims[MP_Y] * dims[MP_Z] != numranks) {
+    fprintf(stderr, "\n nx * ny * nz does not equal number of ranks\n");
+    MPI_Abort(MPI_COMM_WORLD, 1);
+  }
+
+  //figure out my coordinates
+  int myXcoord = myrank % dims[MP_X];
+  int myYcoord = (myrank % (dims[MP_X] * dims[MP_Y])) / dims[MP_X];
+  int myZcoord = (myrank % (dims[MP_X] * dims[MP_Y] * dims[MP_Z])) / (dims[MP_X] * dims[MP_Y]);
+
+  bool skip[3];
+
+  //which a2as to skip
+  skip[MP_X] = msg_size_x == 0;
+  skip[MP_Y] = msg_size_y == 0;
+  skip[MP_Z] = msg_size_z == 0;
+
+  //all a2a share the buffer
+  int largestMsg = (msg_size_x * dims[MP_X] > msg_size_y * dims[MP_Y]) ? msg_size_x * dims[MP_X] : msg_size_y * dims[MP_Y];
+  largestMsg = (largestMsg > msg_size_z * dims[MP_Z]) ? largestMsg : msg_size_z * dims[MP_Z];
+
+  char *sendbuf = nullptr;
+  char *recvbuf = nullptr;
+
+  //create subcommunicators
+  MPI_Comm X_comm, Y_comm, Z_comm;
+  if(!skip[MP_X]) {
+    MPI_Comm_split(comm, myZcoord * dims[MP_Y] + myYcoord, myXcoord, &X_comm);
+  }
+  if(!skip[MP_Y]) {
+    MPI_Comm_split(comm, myZcoord * dims[MP_X] + myXcoord, myYcoord, &Y_comm);
+  }
+  if(!skip[MP_Z]) {
+    MPI_Comm_split(comm, myYcoord * dims[MP_X] + myXcoord, myZcoord, &Z_comm);
+  }
+
+  double startTime, stopTime;
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  startTime = MPI_Wtime();
+  for (int i = 0; i < MAX_ITER; i++) {
+    double start = MPI_Wtime();
+    if(!skip[MP_X]) {
+      MPI_Alltoall(sendbuf, msg_size_x, MPI_CHAR, recvbuf, msg_size_x, MPI_CHAR, X_comm);
+    }
+
+    if(!skip[MP_Y]) {
+      MPI_Alltoall(sendbuf, msg_size_y, MPI_CHAR, recvbuf, msg_size_y, MPI_CHAR, Y_comm);
+    }
+
+    if(!skip[MP_Z]) {
+      MPI_Alltoall(sendbuf, msg_size_z, MPI_CHAR, recvbuf, msg_size_z, MPI_CHAR, Z_comm);
+    }
+
+    double stop = MPI_Wtime();
+    if (print){
+      printf("Rank %d = [%d,%d,%d] iteration %d: %12.8fs\n", 
+             myrank, myXcoord, myYcoord, myZcoord, i, (stop-start));
+    }
+  }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+  stopTime = MPI_Wtime();
+
+
+  //finalized summary output
+  if(myrank == 0 && MAX_ITER != 0 && print) {
+    printf("Finished %d iterations\n",MAX_ITER);
+    printf("Time elapsed per iteration for grid size (%d,%d,%d) with message sizes (%d,%d,%d) : %f s\n", 
+    dims[MP_X], dims[MP_Y], dims[MP_Z], msg_size_x, msg_size_y, msg_size_z, (stopTime - startTime)/MAX_ITER);
+  }
+
+  MPI_Finalize();
+  return 0;
+}
+
+
diff --git a/sstmac/skeletons/fft/parameters.ini b/sstmac/skeletons/fft/parameters.ini
new file mode 100644
index 000000000..2c111725d
--- /dev/null
+++ b/sstmac/skeletons/fft/parameters.ini
@@ -0,0 +1,59 @@
+
+node {
+ app1 {
+  indexing = block
+  allocation = first_available
+  name = fft
+  launch_cmd = aprun -n 64 -N 1
+  argv = -pex 4 -pey 4 -pez 4 -nx 3200 -ny 3200 -nz 3200 -iterations 3 -print 1
+ }
+ nic {
+  name = snappr
+  injection {
+   mtu = 1KB
+   bandwidth = 10GB/s
+   latency = 1us
+   credits = 64KB
+  }
+ }
+ memory {
+  name = snappr
+  channel_bandwidth = 10GB/s
+  num_channels = 8
+  mtu = 1MB
+  latency = 15ns
+ }
+ proc {
+  ncores = 4
+  frequency = 2.1Ghz
+ }
+ name = simple
+}
+
+switch {
+ name = snappr
+ arbitrator = fifo
+ mtu = 1KB
+ link {
+  bandwidth = 2.5GB/s
+  latency = 100ns
+  credits = 64KB
+ }
+ logp {
+  bandwidth = 2.5GB/s
+  out_in_latency = 1us
+  hop_latency = 100ns
+ }
+ router {
+  name = torus_minimal
+ }
+}
+
+
+topology {
+ name = torus
+ seed = 14
+ geometry = [4,4,4]
+ redundant = [8,4,8]
+ concentration = 1
+}
diff --git a/sstmac/skeletons/halo3d-26/halo3d-26.cc b/sstmac/skeletons/halo3d-26/halo3d-26.cc
new file mode 100644
index 000000000..fb9f7b8a7
--- /dev/null
+++ b/sstmac/skeletons/halo3d-26/halo3d-26.cc
@@ -0,0 +1,440 @@
+/**
+Copyright 2009-2020 National Technology and Engineering Solutions of Sandia,
+LLC (NTESS).  Under the terms of Contract DE-NA-0003525, the U.S.  Government
+retains certain rights in this software.
+
+Sandia National Laboratories is a multimission laboratory managed and operated
+by National Technology and Engineering Solutions of Sandia, LLC., a wholly
+owned subsidiary of Honeywell International, Inc., for the U.S. Department of
+Energy's National Nuclear Security Administration under contract DE-NA0003525.
+
+Copyright (c) 2009-2020, NTESS
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Questions? Contact sst-macro-help@sandia.gov
+*/
+#include <sstmac/replacements/mpi.h>
+#include <sstmac/replacements/sys/time.h>
+#include <sstmac/replacements/time.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+void get_position(int rank, int pex, int pey, int pez,
+                  int* myX, int* myY, int* myZ) 
+{
+  const int plane = rank % (pex * pey);
+  *myY = plane / pex;
+  *myX = (plane % pex) != 0 ? (plane % pex) : 0;
+  *myZ = rank / (pex * pey);
+}
+
+int convert_position_to_rank(int pX, int pY, int pZ,
+                             int myX, int myY, int myZ) 
+{
+  myX = (myX + pX) % pX;
+  myY = (myY + pY) % pY;
+  myZ = (myZ + pZ) % pZ;
+  return (myZ * (pX * pY)) + (myY * pX) + myX;
+}
+
+#define sstmac_app_name halo3d-26
+int USER_MAIN(int argc, char* argv[]) {
+  MPI_Init(&argc, &argv);
+
+  int world_me = -1;
+  int world_size = -1;
+
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_me);
+  MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+  int size = world_size;
+  int me = world_me;
+
+  MPI_Comm halo_comm = MPI_COMM_WORLD;
+
+  int pex = size;
+  int pey = 1;
+  int pez = 1;
+
+  int nx = 10;
+  int ny = 10;
+  int nz = 10;
+
+  int repeats = 100;
+  int vars = 1;
+
+  long sleep = 1000;
+
+  int print = 0;
+
+  for (int i = 1; i < argc; i++) {
+    if (strcmp(argv[i], "-nx") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -nx without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      nx = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-ny") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -ny without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      ny = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-nz") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -nz without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      nz = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-pex") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -pex without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      pex = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-pey") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -pey without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      pey = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-pez") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -pez without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      pez = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-iterations") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -iterations without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      repeats = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-vars") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -vars without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      vars = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-sleep") == 0) {
+      if (i == argc) {
+        if (me == 0) {
+          fprintf(stderr, "Error: specified -sleep without a value.\n");
+        }
+
+        exit(-1);
+      }
+
+      sleep = atol(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "-print") == 0){
+      print = atoi(argv[i + 1]);
+      ++i;
+    } else {
+      if (0 == me) {
+        fprintf(stderr, "Unknown option: %s\n", argv[i]);
+      }
+
+      exit(-1);
+    }
+  }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  if ((pex * pey * pez) != size) {
+    fprintf(stderr, "Error: rank grid does not equal number of ranks.\n");
+    fprintf(stderr, "%7d x %7d x %7d != %7d\n", pex, pey, pez, size);
+    MPI_Abort(MPI_COMM_WORLD, 1);
+  }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  if (me == 0 && print) {
+    printf("# MPI Nearest Neighbor Communication\n");
+    printf("# Info:\n");
+    printf("# Processor Grid:         %7d x %7d x %7d\n", pex, pey, pez);
+    printf("# Data Grid (per rank):   %7d x %7d x %7d\n", nx, ny, nz);
+    printf("# Iterations:             %7d\n", repeats);
+    printf("# Variables:              %7d\n", vars);
+    printf("# Sleep:                  %7ld\n", sleep);
+  }
+
+  int posX, posY, posZ;
+  get_position(me, pex, pey, pez, &posX, &posY, &posZ);
+
+  const int xFaceUp =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY, posZ);
+  const int xFaceDown =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY, posZ);
+  const int yFaceUp =
+      convert_position_to_rank(pex, pey, pez, posX, posY + 1, posZ);
+  const int yFaceDown =
+      convert_position_to_rank(pex, pey, pez, posX, posY - 1, posZ);
+  const int zFaceUp =
+      convert_position_to_rank(pex, pey, pez, posX, posY, posZ + 1);
+  const int zFaceDown =
+      convert_position_to_rank(pex, pey, pez, posX, posY, posZ - 1);
+
+  const int vertexA =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY - 1, posZ - 1);
+  const int vertexB =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY - 1, posZ + 1);
+  const int vertexC =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY + 1, posZ - 1);
+  const int vertexD =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY + 1, posZ + 1);
+  const int vertexE =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY - 1, posZ - 1);
+  const int vertexF =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY - 1, posZ + 1);
+  const int vertexG =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY + 1, posZ - 1);
+  const int vertexH =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY + 1, posZ + 1);
+
+  const int edgeA =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY - 1, posZ);
+  const int edgeB =
+      convert_position_to_rank(pex, pey, pez, posX, posY - 1, posZ - 1);
+  const int edgeC =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY - 1, posZ);
+  const int edgeD =
+      convert_position_to_rank(pex, pey, pez, posX, posY - 1, posZ + 1);
+  const int edgeE =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY, posZ + 1);
+  const int edgeF =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY, posZ + 1);
+  const int edgeG =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY, posZ - 1);
+  const int edgeH =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY, posZ - 1);
+  const int edgeI =
+      convert_position_to_rank(pex, pey, pez, posX - 1, posY + 1, posZ);
+  const int edgeJ =
+      convert_position_to_rank(pex, pey, pez, posX, posY + 1, posZ + 1);
+  const int edgeK =
+      convert_position_to_rank(pex, pey, pez, posX + 1, posY + 1, posZ);
+  const int edgeL =
+      convert_position_to_rank(pex, pey, pez, posX, posY + 1, posZ - 1);
+
+  int requestcount = 0;
+  MPI_Status* status;
+  status = (MPI_Status*)malloc(sizeof(MPI_Status) * 52);
+
+  MPI_Request* requests;
+  requests = (MPI_Request*)malloc(sizeof(MPI_Request) * 52);
+
+  double* sendBuffer = nullptr;
+  double* recvBuffer = nullptr;
+
+  struct timeval start;
+  struct timeval end;
+
+  struct timespec sleepTS;
+  sleepTS.tv_sec = 0;
+  sleepTS.tv_nsec = sleep;
+
+  struct timespec remainTS;
+
+  gettimeofday(&start, NULL);
+
+  for (int i = 0; i < repeats; ++i) {
+    requestcount = 0;
+    struct timeval iter_start;
+    struct timeval iter_end;
+    gettimeofday(&iter_start, NULL);
+
+    if (nanosleep(&sleepTS, &remainTS) == EINTR) {
+      while (nanosleep(&remainTS, &remainTS) == EINTR)
+        ;
+    }
+
+    MPI_Irecv(recvBuffer, ny * nz * vars, MPI_DOUBLE, xFaceUp, 1000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, ny * nz * vars, MPI_DOUBLE, xFaceUp, 1000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, ny * nz * vars, MPI_DOUBLE, xFaceDown,
+              1000, halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, ny * nz * vars, MPI_DOUBLE, xFaceDown,
+              1000, halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * nz * vars, MPI_DOUBLE, yFaceUp, 2000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * nz * vars, MPI_DOUBLE, yFaceUp, 2000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * nz * vars, MPI_DOUBLE, yFaceDown,
+              2000, halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * nz * vars, MPI_DOUBLE, yFaceDown,
+              2000, halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * ny * vars, MPI_DOUBLE, zFaceUp, 4000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * ny * vars, MPI_DOUBLE, zFaceUp, 4000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * ny * vars, MPI_DOUBLE, zFaceDown,
+              4000, halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * ny * vars, MPI_DOUBLE, zFaceDown,
+              4000, halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nz * vars, MPI_DOUBLE, edgeA, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nz * vars, MPI_DOUBLE, edgeA, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * vars, MPI_DOUBLE, edgeB, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * vars, MPI_DOUBLE, edgeB, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nz * vars, MPI_DOUBLE, edgeC, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nz * vars, MPI_DOUBLE, edgeC, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * vars, MPI_DOUBLE, edgeD, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * vars, MPI_DOUBLE, edgeD, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, ny * vars, MPI_DOUBLE, edgeE, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, ny * vars, MPI_DOUBLE, edgeE, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, ny * vars, MPI_DOUBLE, edgeF, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, ny * vars, MPI_DOUBLE, edgeF, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, ny * vars, MPI_DOUBLE, edgeG, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, ny * vars, MPI_DOUBLE, edgeG, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, ny * vars, MPI_DOUBLE, edgeH, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, ny * vars, MPI_DOUBLE, edgeH, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nz * vars, MPI_DOUBLE, edgeI, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nz * vars, MPI_DOUBLE, edgeI, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * vars, MPI_DOUBLE, edgeJ, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * vars, MPI_DOUBLE, edgeJ, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nz * vars, MPI_DOUBLE, edgeK, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nz * vars, MPI_DOUBLE, edgeK, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Irecv(recvBuffer, nx * vars, MPI_DOUBLE, edgeL, 8000,
+              halo_comm, &requests[requestcount++]);
+    MPI_Isend(sendBuffer, nx * vars, MPI_DOUBLE, edgeL, 8000,
+              halo_comm, &requests[requestcount++]);
+
+    MPI_Waitall(requestcount, requests, status);
+    requestcount = 0;
+    gettimeofday(&iter_end, NULL);
+    const double timeTaken = (iter_end.tv_sec-iter_start.tv_sec) + (iter_end.tv_usec-iter_start.tv_usec)*1e-6;
+    if (print){
+      printf("Rank %d = [%d,%d,%d] iteration %d: %12.8fs\n", me, posX, posY, posZ, i, timeTaken);
+    }
+  }
+
+  gettimeofday(&end, NULL);
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  if (convert_position_to_rank(pex, pey, pez, pex / 2, pey / 2, pez / 2) ==
+      me) {
+
+    if (print){
+      printf("# Results from rank: %d\n", me);
+
+      const double timeTaken =
+          (((double)end.tv_sec) + ((double)end.tv_usec) * 1.0e-6) -
+          (((double)start.tv_sec) + ((double)start.tv_usec) * 1.0e-6);
+
+      printf("Total time = %20.6f\n", timeTaken);
+    }
+  }
+
+  MPI_Finalize();
+  return 0;
+}
diff --git a/sstmac/skeletons/halo3d-26/parameters.ini b/sstmac/skeletons/halo3d-26/parameters.ini
new file mode 100644
index 000000000..1d0e6101b
--- /dev/null
+++ b/sstmac/skeletons/halo3d-26/parameters.ini
@@ -0,0 +1,59 @@
+
+node {
+ app1 {
+  indexing = block
+  allocation = first_available
+  name = halo3d-26
+  launch_cmd = aprun -n 64 -N 1
+  argv = -pex 4 -pey 4 -pez 4 -nx 100 -ny 100 -nz 100 -iterations 3 -print 1
+ }
+ nic {
+  name = snappr
+  injection {
+   mtu = 1KB
+   bandwidth = 10GB/s
+   latency = 1us
+   credits = 64KB
+  }
+ }
+ memory {
+  name = snappr
+  channel_bandwidth = 10GB/s
+  num_channels = 8
+  mtu = 1MB
+  latency = 15ns
+ }
+ proc {
+  ncores = 4
+  frequency = 2.1Ghz
+ }
+ name = simple
+}
+
+switch {
+ name = snappr
+ arbitrator = fifo
+ mtu = 1KB
+ link {
+  bandwidth = 2.5GB/s
+  latency = 100ns
+  credits = 64KB
+ }
+ logp {
+  bandwidth = 2.5GB/s
+  out_in_latency = 1us
+  hop_latency = 100ns
+ }
+ router {
+  name = torus_minimal
+ }
+}
+
+
+topology {
+ name = torus
+ seed = 14
+ geometry = [4,4,4]
+ redundant = [8,4,8]
+ concentration = 1
+}
diff --git a/sstmac/skeletons/sweep3d/parameters.ini b/sstmac/skeletons/sweep3d/parameters.ini
new file mode 100644
index 000000000..0aa03e934
--- /dev/null
+++ b/sstmac/skeletons/sweep3d/parameters.ini
@@ -0,0 +1,59 @@
+
+node {
+ app1 {
+  indexing = block
+  allocation = first_available
+  name = sweep3d
+  launch_cmd = aprun -n 64 -N 1
+  argv = -pex 8 -pey 8 -nx 64 -ny 64 -nz 100 -kba 10 -vars 10 -iterations 2 -print 1
+ }
+ nic {
+  name = snappr
+  injection {
+   mtu = 1KB
+   bandwidth = 10GB/s
+   latency = 1us
+   credits = 64KB
+  }
+ }
+ memory {
+  name = snappr
+  channel_bandwidth = 10GB/s
+  num_channels = 8
+  mtu = 1MB
+  latency = 15ns
+ }
+ proc {
+  ncores = 4
+  frequency = 2.1Ghz
+ }
+ name = simple
+}
+
+switch {
+ name = snappr
+ arbitrator = fifo
+ mtu = 1KB
+ link {
+  bandwidth = 2.5GB/s
+  latency = 100ns
+  credits = 64KB
+ }
+ logp {
+  bandwidth = 2.5GB/s
+  out_in_latency = 1us
+  hop_latency = 100ns
+ }
+ router {
+  name = torus_minimal
+ }
+}
+
+
+topology {
+ name = torus
+ seed = 14
+ geometry = [4,4,4]
+ redundant = [8,4,8]
+ concentration = 1
+}
diff --git a/sstmac/skeletons/sweep3d/sweep3d.cc b/sstmac/skeletons/sweep3d/sweep3d.cc
new file mode 100644
index 000000000..a4dff893c
--- /dev/null
+++ b/sstmac/skeletons/sweep3d/sweep3d.cc
@@ -0,0 +1,323 @@
+/**
+Copyright 2009-2020 National Technology and Engineering Solutions of Sandia,
+LLC (NTESS).  Under the terms of Contract DE-NA-0003525, the U.S.  Government
+retains certain rights in this software.
+
+Sandia National Laboratories is a multimission laboratory managed and operated
+by National Technology and Engineering Solutions of Sandia, LLC., a wholly
+owned subsidiary of Honeywell International, Inc., for the U.S. Department of
+Energy's National Nuclear Security Administration under contract DE-NA0003525.
+
+Copyright (c) 2009-2020, NTESS
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Questions? Contact sst-macro-help@sandia.gov
+*/
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sstmac/replacements/mpi.h>
+#include <sstmac/replacements/sys/time.h>
+#include <sstmac/replacements/time.h>
+
+void get_position(const int rank, const int pex, const int pey, int* myX,
+                  int* myY) {
+  *myX = rank % pex;
+  *myY = rank / pex;
+}
+
+void compute(long sleep) {
+  struct timespec sleepTS;
+  sleepTS.tv_sec = 0;
+  sleepTS.tv_nsec = sleep;
+
+  struct timespec remainTS;
+
+  if (nanosleep(&sleepTS, &remainTS) == EINTR) {
+    while (nanosleep(&remainTS, &remainTS) == EINTR)
+      ;
+  }
+}
+
+#define sstmac_app_name sweep3d
+int USER_MAIN(int argc, char* argv[]) 
+{
+  MPI_Init(&argc, &argv);
+
+  int rank = -1;
+  int size = -1;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+  MPI_Comm sweep_comm = MPI_COMM_WORLD;
+
+  int pex = -1;
+  int pey = -1;
+  int nx = 50;
+  int ny = 50;
+  int nz = 100;
+  int kba = 10;
+  int repeats = 1;
+
+  int vars = 1;
+  long sleep = 1000;
+  int print = 0;
+
+  for (int i = 0; i < argc; ++i) {
+    if (strcmp("-pex", argv[i]) == 0) {
+      pex = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-pey", argv[i]) == 0) {
+      pey = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-iterations", argv[i]) == 0) {
+      repeats = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-nx", argv[i]) == 0) {
+      nx = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-ny", argv[i]) == 0) {
+      ny = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-nz", argv[i]) == 0) {
+      nz = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-sleep", argv[i]) == 0) {
+      sleep = atol(argv[i + 1]);
+      i++;
+    } else if (strcmp("-vars", argv[i]) == 0) {
+      vars = atoi(argv[i + 1]);
+      i++;
+    } else if (strcmp("-kba", argv[i]) == 0) {
+      kba = atoi(argv[i + 1]);
+      i++;
+    }  else if (strcmp(argv[i], "-print") == 0){
+      print = atoi(argv[i + 1]);
+      i++;
+    }
+  }
+
+  if (kba == 0) {
+    if (rank == 0) {
+      fprintf(stderr,
+              "K-Blocking Factor must not be zero. Please specify -kba <value "
+              "> 0>\n");
+    }
+    MPI_Barrier(MPI_COMM_WORLD); //needed to force correct printing
+    exit(-1);
+  }
+
+  if (nz % kba != 0) {
+    if (rank == 0) {
+      fprintf(stderr,
+              "KBA must evenly divide NZ, KBA=%d, NZ=%d, remainder=%d (must be "
+              "zero)\n",
+              kba, nz, (nz % kba));
+    }
+    MPI_Barrier(MPI_COMM_WORLD); //needed to force correct printing
+    exit(-1);
+  }
+
+  if ((pex * pey) != size) {
+    if (0 == rank) {
+      fprintf(
+          stderr,
+          "Error: processor decomposition (%d x %d) != number of ranks (%d)\n",
+          pex, pey, size);
+    }
+    MPI_Barrier(MPI_COMM_WORLD); //needed to force correct printing
+    exit(-1);
+  }
+
+  if (rank == 0 && print) {
+    printf("# Sweep3D Communication Pattern\n");
+    printf("# Info:\n");
+    printf("# Px:              %8d\n", pex);
+    printf("# Py:              %8d\n", pey);
+    printf("# Nx x Ny x Nz:    %8d x %8d x %8d\n", nx, ny, nz);
+    printf("# KBA:             %8d\n", kba);
+    printf("# Variables:       %8d\n", vars);
+    printf("# Iterations:      %8d\n", repeats);
+  }
+
+  int myX = -1;
+  int myY = -1;
+
+  get_position(rank, pex, pey, &myX, &myY);
+
+  const int xUp = (myX != (pex - 1)) ? rank + 1 : -1;
+  const int xDown = (myX != 0) ? rank - 1 : -1;
+
+  const int yUp = (myY != (pey - 1)) ? rank + pex : -1;
+  const int yDown = (myY != 0) ? rank - pex : -1;
+
+  MPI_Status status;
+
+  double* xRecvBuffer = nullptr;
+  double* xSendBuffer = nullptr;
+
+  double* yRecvBuffer = nullptr;
+  double* ySendBuffer = nullptr;
+
+  struct timeval start;
+  struct timeval end;
+
+  gettimeofday(&start, NULL);
+
+  // We repeat this sequence twice because there are really 8 vertices in the 3D
+  // data domain and we sweep from each of them, processing the top four first
+  // and then the bottom four vertices next.
+  for (int i = 0; i < (repeats * 2); ++i) {
+    // Recreate communication pattern of sweep from (0,0) towards (Px,Py)
+    struct timeval iter_start;
+    struct timeval iter_end;
+    gettimeofday(&iter_start, NULL);
+    for (int k = 0; k < nz; k += kba) {
+      if (xDown > -1) {
+        MPI_Recv(xRecvBuffer, (nx * kba * vars), MPI_DOUBLE, xDown, 1000,
+                 sweep_comm, &status);
+      }
+
+      if (yDown > -1) {
+        MPI_Recv(yRecvBuffer, (ny * kba * vars), MPI_DOUBLE, yDown, 1000,
+                 sweep_comm, &status);
+      }
+
+      compute(sleep);
+
+      if (xUp > -1) {
+        MPI_Send(xSendBuffer, (nx * kba * vars), MPI_DOUBLE, xUp, 1000,
+                 sweep_comm);
+      }
+
+      if (yUp > -1) {
+        MPI_Send(ySendBuffer, (nx * kba * vars), MPI_DOUBLE, yUp, 1000,
+                 sweep_comm);
+      }
+    }
+
+    // Recreate communication pattern of sweep from (Px,0) towards (0,Py)
+    for (int k = 0; k < nz; k += kba) {
+      if (xUp > -1) {
+        MPI_Recv(xRecvBuffer, (nx * kba * vars), MPI_DOUBLE, xUp, 2000,
+                 sweep_comm, &status);
+      }
+
+      if (yDown > -1) {
+        MPI_Recv(yRecvBuffer, (ny * kba * vars), MPI_DOUBLE, yDown, 2000,
+                 sweep_comm, &status);
+      }
+
+      compute(sleep);
+
+      if (xDown > -1) {
+        MPI_Send(xSendBuffer, (nx * kba * vars), MPI_DOUBLE, xDown, 2000,
+                 sweep_comm);
+      }
+
+      if (yUp > -1) {
+        MPI_Send(ySendBuffer, (nx * kba * vars), MPI_DOUBLE, yUp, 2000,
+                 sweep_comm);
+      }
+    }
+
+    // Recreate communication pattern of sweep from (Px,Py) towards (0,0)
+    for (int k = 0; k < nz; k += kba) {
+      if (xUp > -1) {
+        MPI_Recv(xRecvBuffer, (nx * kba * vars), MPI_DOUBLE, xUp, 3000,
+                 sweep_comm, &status);
+      }
+
+      if (yUp > -1) {
+        MPI_Recv(yRecvBuffer, (ny * kba * vars), MPI_DOUBLE, yUp, 3000,
+                 sweep_comm, &status);
+      }
+
+      compute(sleep);
+
+      if (xDown > -1) {
+        MPI_Send(xSendBuffer, (nx * kba * vars), MPI_DOUBLE, xDown, 3000,
+                 sweep_comm);
+      }
+
+      if (yDown > -1) {
+        MPI_Send(ySendBuffer, (nx * kba * vars), MPI_DOUBLE, yDown, 3000,
+                 sweep_comm);
+      }
+    }
+
+    // Recreate communication pattern of sweep from (0,Py) towards (Px,0)
+    for (int k = 0; k < nz; k += kba) {
+      if (xDown > -1) {
+        MPI_Recv(xRecvBuffer, (nx * kba * vars), MPI_DOUBLE, xDown, 4000,
+                 sweep_comm, &status);
+      }
+
+      if (yUp > -1) {
+        MPI_Recv(yRecvBuffer, (ny * kba * vars), MPI_DOUBLE, yUp, 4000,
+                 sweep_comm, &status);
+      }
+
+      compute(sleep);
+
+      if (xUp > -1) {
+        MPI_Send(xSendBuffer, (nx * kba * vars), MPI_DOUBLE, xUp, 4000,
+                 sweep_comm);
+      }
+
+      if (yDown > -1) {
+        MPI_Send(ySendBuffer, (nx * kba * vars), MPI_DOUBLE, yDown, 4000,
+                 sweep_comm);
+      }
+    }
+    gettimeofday(&iter_end, NULL);
+    const double timeTaken = (iter_end.tv_sec-iter_start.tv_sec) + (iter_end.tv_usec-iter_start.tv_usec)*1e-6;
+    if (print){
+      printf("Rank %d = [%d,%d] iteration %d: %12.8fs\n", rank, myX, myY, i, timeTaken);
+    }
+  }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+  gettimeofday(&end, NULL);
+
+  const double timeTaken =
+      (((double)end.tv_sec) + ((double)end.tv_usec) * 1.0e-6) -
+      (((double)start.tv_sec) + ((double)start.tv_usec) * 1.0e-6);
+
+  if (rank == 0){
+    if (print){
+      printf("Total time = %20.6f\n", timeTaken);
+    }
+  }
+  MPI_Finalize();
+  return 0;
+}
diff --git a/tests/Makefile.clang_tests b/tests/Makefile.clang_tests
index 17606ad5c..9461f4404 100644
--- a/tests/Makefile.clang_tests
+++ b/tests/Makefile.clang_tests
@@ -67,7 +67,7 @@ CLANGTEMP=$(CLANGTESTS:%=test_clang_%.tmp-out)
 .PRECIOUS: $(CLANGTEMP)
 
 test_clang_%.$(CHKSUF): test_clang_%.tmp-out
-	$(top_srcdir)/tests/checkdiff $< $(top_srcdir)
+	$(top_builddir)/tests/checkdiff $< $(top_srcdir)
 
 test_clang_%_cpp.tmp-out: sst.pp.%.cc $(SSTMAC_DEGLOBAL) 
 	-$(CXX) -std=c++11 -c $< -o tmp.o \
diff --git a/tests/Makefile.core_tests b/tests/Makefile.core_tests
index fb3a41011..0eb70c64c 100644
--- a/tests/Makefile.core_tests
+++ b/tests/Makefile.core_tests
@@ -5,6 +5,9 @@
 
 CORETESTS+= \
   test_sumi_collective \
+  test_core_apps_fft \
+  test_core_apps_halo3d \
+  test_core_apps_sweep3d \
   test_core_apps_ping_pong_snappr \
   test_core_apps_ping_pong_mem_thrash \
   test_core_apps_ping_all_dfly_snappr \
diff --git a/tests/checkdiff b/tests/checkdiff.in
similarity index 95%
rename from tests/checkdiff
rename to tests/checkdiff.in
index 7b8fe9718..38a3f3be2 100755
--- a/tests/checkdiff
+++ b/tests/checkdiff.in
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! @pyexe@
 
 import sys
 import signal
@@ -6,6 +6,8 @@ import time
 import os
 import subprocess
 import re
+
+sys.path.append("@abs_top_srcdir@/bin")
 from configlib import getstatusoutput
 
 class bcolors:
diff --git a/tests/reference/test_core_apps_fft.ref-out b/tests/reference/test_core_apps_fft.ref-out
new file mode 100644
index 000000000..0c379e318
--- /dev/null
+++ b/tests/reference/test_core_apps_fft.ref-out
@@ -0,0 +1 @@
+Estimated total runtime of           0.00007262 seconds
diff --git a/tests/reference/test_core_apps_halo3d.ref-out b/tests/reference/test_core_apps_halo3d.ref-out
new file mode 100644
index 000000000..2ba4979b5
--- /dev/null
+++ b/tests/reference/test_core_apps_halo3d.ref-out
@@ -0,0 +1 @@
+Estimated total runtime of           0.00070607 seconds
diff --git a/tests/reference/test_core_apps_sweep3d.ref-out b/tests/reference/test_core_apps_sweep3d.ref-out
new file mode 100644
index 000000000..e4f98a449
--- /dev/null
+++ b/tests/reference/test_core_apps_sweep3d.ref-out
@@ -0,0 +1 @@
+Estimated total runtime of           0.00254201 seconds
diff --git a/tests/test_configs/test_fft.ini b/tests/test_configs/test_fft.ini
new file mode 100644
index 000000000..07978eb02
--- /dev/null
+++ b/tests/test_configs/test_fft.ini
@@ -0,0 +1,59 @@
+
+node {
+ app1 {
+  indexing = block
+  allocation = first_available
+  name = fft
+  launch_cmd = aprun -n 64 -N 1
+  argv = -pex 4 -pey 4 -pez 4 -nx 100 -ny 100 -nz 100 -iterations 2
+ }
+ nic {
+  name = snappr
+  injection {
+   mtu = 1KB
+   bandwidth = 10GB/s
+   latency = 1us
+   credits = 64KB
+  }
+ }
+ memory {
+  name = snappr
+  channel_bandwidth = 10GB/s
+  num_channels = 8
+  mtu = 1MB
+  latency = 15ns
+ }
+ proc {
+  ncores = 4
+  frequency = 2.1Ghz
+ }
+ name = simple
+}
+
+switch {
+ name = snappr
+ arbitrator = fifo
+ mtu = 1KB
+ link {
+  bandwidth = 2.5GB/s
+  latency = 100ns
+  credits = 64KB
+ }
+ logp {
+  bandwidth = 2.5GB/s
+  out_in_latency = 1us
+  hop_latency = 100ns
+ }
+ router {
+  name = torus_minimal
+ }
+}
+
+
+topology {
+ name = torus
+ seed = 14
+ geometry = [4,4,4]
+ redundant = [8,4,8]
+ concentration = 1
+}
diff --git a/tests/test_configs/test_halo3d.ini b/tests/test_configs/test_halo3d.ini
new file mode 100644
index 000000000..aca89db66
--- /dev/null
+++ b/tests/test_configs/test_halo3d.ini
@@ -0,0 +1,59 @@
+
+node {
+ app1 {
+  indexing = block
+  allocation = first_available
+  name = halo3d-26
+  launch_cmd = aprun -n 64 -N 1
+  argv = -pex 4 -pey 4 -pez 4 -nx 100 -ny 100 -nz 100 -iterations 3
+ }
+ nic {
+  name = snappr
+  injection {
+   mtu = 1KB
+   bandwidth = 10GB/s
+   latency = 1us
+   credits = 64KB
+  }
+ }
+ memory {
+  name = snappr
+  channel_bandwidth = 10GB/s
+  num_channels = 8
+  mtu = 1MB
+  latency = 15ns
+ }
+ proc {
+  ncores = 4
+  frequency = 2.1Ghz
+ }
+ name = simple
+}
+
+switch {
+ name = snappr
+ arbitrator = fifo
+ mtu = 1KB
+ link {
+  bandwidth = 2.5GB/s
+  latency = 100ns
+  credits = 64KB
+ }
+ logp {
+  bandwidth = 2.5GB/s
+  out_in_latency = 1us
+  hop_latency = 100ns
+ }
+ router {
+  name = torus_minimal
+ }
+}
+
+
+topology {
+ name = torus
+ seed = 14
+ geometry = [4,4,4]
+ redundant = [8,4,8]
+ concentration = 1
+}
diff --git a/tests/test_configs/test_sweep3d.ini b/tests/test_configs/test_sweep3d.ini
new file mode 100644
index 000000000..99cd3783b
--- /dev/null
+++ b/tests/test_configs/test_sweep3d.ini
@@ -0,0 +1,59 @@
+
+node {
+ app1 {
+  indexing = block
+  allocation = first_available
+  name = sweep3d
+  launch_cmd = aprun -n 64 -N 1
+  argv = -pex 8 -pey 8 -nx 32 -ny 32 -nz 20 -kba 10 -vars 10 -iterations 1 -print 0
+ }
+ nic {
+  name = snappr
+  injection {
+   mtu = 1KB
+   bandwidth = 10GB/s
+   latency = 1us
+   credits = 64KB
+  }
+ }
+ memory {
+  name = snappr
+  channel_bandwidth = 10GB/s
+  num_channels = 8
+  mtu = 1MB
+  latency = 15ns
+ }
+ proc {
+  ncores = 4
+  frequency = 2.1Ghz
+ }
+ name = simple
+}
+
+switch {
+ name = snappr
+ arbitrator = fifo
+ mtu = 1KB
+ link {
+  bandwidth = 2.5GB/s
+  latency = 100ns
+  credits = 64KB
+ }
+ logp {
+  bandwidth = 2.5GB/s
+  out_in_latency = 1us
+  hop_latency = 100ns
+ }
+ router {
+  name = torus_minimal
+ }
+}
+
+
+topology {
+ name = torus
+ seed = 14
+ geometry = [4,4,4]
+ redundant = [8,4,8]
+ concentration = 1
+}