From 5aaf68a248fd99883f8b4c02a22fc7e67eb4ca32 Mon Sep 17 00:00:00 2001
From: Michael Bentley <bentley8@llnl.gov>
Date: Wed, 5 Jul 2017 13:29:08 -0700
Subject: [PATCH 1/5] Makefile.in: have updates to flit update Makefile

---
 data/Makefile.in               | 27 ++++++++++++++++-----------
 scripts/flitcli/flit_update.py |  3 ++-
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/data/Makefile.in b/data/Makefile.in
index d4afe49a..b06f2e6d 100644
--- a/data/Makefile.in
+++ b/data/Makefile.in
@@ -10,7 +10,16 @@ UNAME_S         := $(shell uname -s)
 
 FLIT_INC_DIR    := {flit_include_dir}
 FLIT_LIB_DIR    := {flit_lib_dir}
-FLIT_SCRIPT     := {flit_script}
+FLIT_DATA_DIR   := {flit_data_dir}
+FLIT_SCRIPT_DIR := {flit_script_dir}
+
+DEV_CC          ?= {dev_compiler}
+DEV_OPTL        ?= {dev_optl}
+DEV_SWITCHES    ?= {dev_switches}
+
+GT_CC           := {ground_truth_compiler}
+GT_OPTL         := {ground_truth_optl}
+GT_SWITCHES     := {ground_truth_switches}
 
 CC_REQUIRED     += $(FFLAGS)
 CC_REQUIRED     += -std=c++11
@@ -23,14 +32,6 @@ DEV_CFLAGS      += -Wextra
 DEV_CFLAGS      += -Wuninitialized
 DEV_CFLAGS      += -Wno-shift-count-overflow
 
-DEV_CC          ?= {dev_compiler}
-DEV_OPTL        ?= {dev_optl}
-DEV_SWITCHES    ?= {dev_switches}
-
-GT_CC           := {ground_truth_compiler}
-GT_OPTL         := {ground_truth_optl}
-GT_SWITCHES     := {ground_truth_switches}
-
 LD_REQUIRED     += -lm
 LD_REQUIRED     += -lstdc++
 ifeq ($(UNAME_S),Darwin) # If we are on a Mac OSX system
@@ -351,8 +352,12 @@ distclean: clean
 .PRECIOUS: %.d
 -include $(SOURCE:%.cpp=%.d) $(DEV_DEPS) $(GT_DEPS)
 
-Makefile: flit-config.toml $(dir $(FLIT_SCRIPT))/flit_update.py
-	$(FLIT_SCRIPT) update
+Makefile: flit-config.toml
+Makefile: $(FLIT_DATA_DIR)/Makefile.in
+Makefile: $(FLIT_SCRIPT_DIR)/flitconfig.py
+Makefile: $(FLIT_SCRIPT_DIR)/flitutil.py
+Makefile: $(FLIT_SCRIPT_DIR)/flit_update.py
+	$(FLIT_SCRIPT_DIR)/flit.py update
 
 # We have a different solution if we are on a mac
 ifeq ($(UNAME_S),Darwin)
diff --git a/scripts/flitcli/flit_update.py b/scripts/flitcli/flit_update.py
index 53e324b0..cd897537 100644
--- a/scripts/flitcli/flit_update.py
+++ b/scripts/flitcli/flit_update.py
@@ -81,7 +81,8 @@ def main(arguments, prog=sys.argv[0]):
             'ground_truth_switches': gt_switches,
             'flit_include_dir': conf.include_dir,
             'flit_lib_dir': conf.lib_dir,
-            'flit_script': os.path.join(conf.script_dir, 'flit.py'),
+            'flit_data_dir': conf.data_dir,
+            'flit_script_dir': conf.script_dir,
         },
         overwrite=True)
 

From 51eb11385f940e30f0f3bc2fe2420b3be40b5f50 Mon Sep 17 00:00:00 2001
From: Michael Bentley <bentley8@llnl.gov>
Date: Wed, 5 Jul 2017 17:58:45 -0700
Subject: [PATCH 2/5] Makefile.in: correctly build custom adds to SOURCE

---
 data/Makefile.in | 214 ++++++++++++++++++++++++++---------------------
 data/custom.mk   |   2 +-
 2 files changed, 121 insertions(+), 95 deletions(-)

diff --git a/data/Makefile.in b/data/Makefile.in
index b06f2e6d..0c73daa5 100644
--- a/data/Makefile.in
+++ b/data/Makefile.in
@@ -21,6 +21,12 @@ GT_CC           := {ground_truth_compiler}
 GT_OPTL         := {ground_truth_optl}
 GT_SWITCHES     := {ground_truth_switches}
 
+NVCC_BIN        := nvcc
+NVCC            := $(shell which $(NVCC_BIN))
+CUDA_DIR        := $(dir $(NVCC))/..
+
+OBJ_DIR          = obj
+
 CC_REQUIRED     += $(FFLAGS)
 CC_REQUIRED     += -std=c++11
 CC_REQUIRED     += -I.
@@ -43,16 +49,83 @@ endif
 
 DEV_LDFLAGS     +=
 
-DEPFLAGS        += -MD -MF $*_dev.d
+DEPFLAGS        += -MMD -MF $(patsubst %.o,%.d,$@)
 
-TESTS           := $(wildcard tests/*.cpp)
-SOURCE          := $(wildcard *.cpp)
+TESTS            = $(wildcard tests/*.cpp)
+SOURCE           = $(wildcard *.cpp)
 SOURCE          += $(TESTS)
 
-DEV_OBJ          = $(SOURCE:%.cpp=%_dev.o)
-DEV_DEPS         = $(SOURCE:%.cpp=%_dev.d)
-GT_OBJ           = $(SOURCE:%.cpp=%_gt.o)
-GT_DEPS          = $(SOURCE:%.cpp=%_gt.d)
+VPATH            = $(dir $(SOURCE))
+
+CUSOURCE        += $(TESTS)
+CUSOURCE        += $(wildcard *.cpp)
+# TODO: use DEV_CUOBJ.  It is not yet used
+DEV_CUOBJ       += $(addprefix $(OBJ_DIR)/,$(notdir $(CUSOURCE:%.cpp=%_cu_dev.o)))
+
+VPATH           += $(dir $(CUSOURCE))
+
+HAS_CUDA        := $(shell command -v $(NVCC_BIN) 2> /dev/null)
+ifdef HAS_CUDA
+NVCC_CFLAGS     += --std=c++11
+NVCC_CFLAGS     += -ccbin=g++
+NVCC_CFLAGS     += $(DEVCAP)
+NVCC_CFLAGS     += -I .
+NVCC_CFLAGS     += -x cu
+NVCC_CFLAGS     += -dc
+NVCC_CFLAGS     += -D__CUDA__
+NVCC_CFLAGS     += -I$(FLIT_INC_DIR)
+NVCC_CFLAGS     += -I$(CUDA_DIR)/samples/common/inc
+endif # end of ifdef HAS_CUDA
+
+ifeq ($(UNAME_S),Darwin) # If we are on a Mac OSX system
+  NVCC_LINK     += -Llib -lflit
+else  # not on Mac OSX
+  NVCC_LINK     += -L$(FLIT_LIB_DIR) -lflit
+  NVCC_LINK     += -Xcompiler \"-Wl,-rpath=$(realpath $(FLIT_LIB_DIR))\"
+endif # end of if on Mac OSX
+
+NVCC_LINK       += --std=c++11
+NVCC_LINK       += -ccbin=g++
+NVCC_LINK       += -L$(CUDA_DIR)/lib64
+
+# TODO: double check CUDA flags.  Really?  No optimization levels?
+
+DEV_NVCC_CC     +=
+DEV_NVCC_LD     +=
+
+.PHONY: help
+help:
+	@echo 'You can run the Makefile directly, but it is recommended to use'
+	@echo
+	@echo '  flit make'
+	@echo
+	@echo 'so that you can have functionality such as adding the results to a'
+	@echo 'database.'
+	@echo
+	@echo 'The following targets are available:'
+	@echo
+	@echo '  help        Show this help and exit (default target)'
+	@echo '  dev         Only run the devel compilation to test things out'
+	@echo '  devcuda     Only run the devel CUDA compilation to test CUDA out'
+	@echo '  groundtruth Compile the ground-truth version and get its output'
+	@echo '  gt          Same as groundtruth'
+	@echo '  run         Run all combinations of compilation, results in results/'
+	@echo '  clean       Clean intermediate files'
+	@echo '  veryclean   Runs clean + removes targets and results'
+	@echo '  distclean   Same as veryclean'
+	@echo
+
+# Note: having this include at the end breaks the makefile for when users add
+# to SOURCE from custom.mk
+-include custom.mk
+
+VPATH           := $(sort $(VPATH))
+
+
+DEV_OBJ          = $(addprefix $(OBJ_DIR)/,$(notdir $(SOURCE:%.cpp=%_dev.o)))
+DEV_DEPS         = $(DEV_OBJ:%.o=%.d)
+GT_OBJ           = $(addprefix $(OBJ_DIR)/,$(notdir $(SOURCE:%.cpp=%_gt.o)))
+GT_DEPS          = $(GT_OBJ:%.o=%.d)
 
 CLANG           := clang++
 INTEL           := icpc
@@ -211,7 +284,11 @@ TARGETS         := $(foreach c, $(COMPILERS), \
                       ) \
                     )
 BIN             := $(TARGETS:%_out.csv=%)
-OBJ              = $(foreach b,$(BIN),$(SOURCE:%.cpp=%_$(notdir $b).o))
+DEPS             = $(foreach b,$(BIN), \
+                     $(addprefix $(OBJ_DIR)/,$(notdir $(SOURCE:%.cpp=%_$(notdir $b).d))) \
+                    )
+OBJ_CLEAN        = $(addprefix $(OBJ_DIR)/,$(notdir $(SOURCE:%.cpp=%_*.o)))
+DEP_CLEAN       += $(OBJ_CLEAN:%.o=%.d)
 
 ##################################################
 #
@@ -219,45 +296,7 @@ OBJ              = $(foreach b,$(BIN),$(SOURCE:%.cpp=%_$(notdir $b).o))
 #
 ##################################################
 
-# TODO: double check CUDA flags.  Really?  No optimization levels?
-
-NVCC_BIN        := nvcc
-NVCC            := $(shell which $(NVCC_BIN))
-CUDA_DIR        := $(dir $(NVCC))/..
 
-HAS_CUDA        := $(shell command -v $(NVCC_BIN) 2> /dev/null)
-ifdef HAS_CUDA
-
-NVCC_CFLAGS     += --std=c++11
-NVCC_CFLAGS     += -ccbin=g++
-NVCC_CFLAGS     += $(DEVCAP)
-NVCC_CFLAGS     += -I .
-NVCC_CFLAGS     += -x cu
-NVCC_CFLAGS     += -dc
-NVCC_CFLAGS     += -D__CUDA__
-NVCC_CFLAGS     += -I$(FLIT_INC_DIR)
-NVCC_CFLAGS     += -I$(CUDA_DIR)/samples/common/inc
-
-ifeq ($(UNAME_S),Darwin) # If we are on a Mac OSX system
-  NVCC_LINK     += -Llib -lflit
-else
-  NVCC_LINK     += -L$(FLIT_LIB_DIR) -lflit
-  NVCC_LINK     += -Xcompiler \"-Wl,-rpath=$(realpath $(FLIT_LIB_DIR))\"
-endif
-
-LD_REQUIRED     += -lm
-LD_REQUIRED     += -lstdc++
-NVCC_LINK       += --std=c++11
-NVCC_LINK       += -ccbin=g++
-NVCC_LINK       += -L$(CUDA_DIR)/lib64
-
-DEV_NVCC_CC     +=
-DEV_NVCC_LD     +=
-
-CUSOURCE        += $(TESTS)
-CUSOURCE        += $(wildcard *.cpp)
-# TODO: use DEV_CUOBJ.  It is not yet used
-DEV_CUOBJ       += $(CUSOURCE:%.cpp=%_cu_dev.o)
 
 # These are the fp affecting switches for CUDA (7.5).
 # We will naively apply these (though the docs say
@@ -265,6 +304,8 @@ DEV_CUOBJ       += $(CUSOURCE:%.cpp=%_cu_dev.o)
 # --ftz=true --prec-div=false --prec-sqrt=false
 # --fmad=true.
 
+ifdef HAS_CUDA
+
 FASTMC          := --use_fast_math
 FMADFC          := --fmad=false
 FMADTC          := --fmad=true
@@ -289,32 +330,12 @@ CUSWITCHES      += PRECSTC
 CUTARGETS       := $(foreach s, $(CUSWITCHES), \
                      $(RESULTS_DIR)/NVCC_$(HOSTNAME)_$(strip $(s))_out.csv)
 CUBIN           := $(CUTARGETS:%_out.csv=%)
-CUOBJ           := $(foreach b,$(CUBIN),$(CUSOURCE:%.cpp=%_$(notdir $b).o))
+CUDEPS           = $(foreach b,$(CUBIN), \
+                     $(addprefix $(OBJ_DIR)/, \
+                       $(notdir $(CUSOURCE:%.cpp=%_$(notdir $b).d))))
 
 endif # ifdef HAS_CUDA
 
-.PHONY: help
-help:
-	@echo 'You can run the Makefile directly, but it is recommended to use'
-	@echo
-	@echo '  flit make'
-	@echo
-	@echo 'so that you can have functionality such as adding the results to a'
-	@echo 'database.'
-	@echo
-	@echo 'The following targets are available:'
-	@echo
-	@echo '  help        Show this help and exit (default target)'
-	@echo '  dev         Only run the devel compilation to test things out'
-	@echo '  devcuda     Only run the devel CUDA compilation to test CUDA out'
-	@echo '  groundtruth Compile the ground-truth version and get its output'
-	@echo '  gt          Same as groundtruth'
-	@echo '  run         Run all combinations of compilation, results in results/'
-	@echo '  clean       Clean intermediate files'
-	@echo '  veryclean   Runs clean + removes targets and results'
-	@echo '  distclean   Same as veryclean'
-	@echo
-
 .PHONY: dev devcuda gt groundtruth run
 dev: $(DEV_TARGET)
 devcuda: $(DEV_CUTARGET)
@@ -325,13 +346,9 @@ run: $(TARGETS) $(CUTARGETS)
 
 .PHONY: clean
 clean:
-	rm -f $(DEV_OBJ)
-	rm -f $(DEV_DEPS)
-	rm -f $(DEV_CUOBJ)
-	rm -f $(OBJ)
-	rm -f $(CUOBJ)
-	rm -f $(GT_OBJ)
-	rm -f $(GT_DEPS)
+	rm -f $(OBJ_CLEAN)
+	rm -f $(DEP_CLEAN)
+	-rmdir $(OBJ_DIR)
 
 .PHONY: veryclean distclean
 veryclean: distclean
@@ -349,9 +366,6 @@ distclean: clean
 	rm -f $(addsuffix *.dat,$(GT_OUT))
 	-rmdir $(RESULTS_DIR)
 
-.PRECIOUS: %.d
--include $(SOURCE:%.cpp=%.d) $(DEV_DEPS) $(GT_DEPS)
-
 Makefile: flit-config.toml
 Makefile: $(FLIT_DATA_DIR)/Makefile.in
 Makefile: $(FLIT_SCRIPT_DIR)/flitconfig.py
@@ -359,6 +373,7 @@ Makefile: $(FLIT_SCRIPT_DIR)/flitutil.py
 Makefile: $(FLIT_SCRIPT_DIR)/flit_update.py
 	$(FLIT_SCRIPT_DIR)/flit.py update
 
+
 # We have a different solution if we are on a mac
 ifeq ($(UNAME_S),Darwin)
 lib/libflit.so: $(FLIT_LIB_DIR)/libflit.so
@@ -381,12 +396,15 @@ endif # ifeq ($(UNAME_S),Darwin): meaning, we are on a mac
 # Now for the compilation rules:
 #
 
+$(OBJ_DIR):
+	mkdir -p $(OBJ_DIR)
+
 # Dev compilation rules first (easier to understand)
-$(DEV_TARGET): $(DEV_OBJ) Makefile
+$(DEV_TARGET): $(DEV_OBJ) Makefile custom.mk
 	$(DEV_CC) $(CC_REQUIRED) $(DEV_CFLAGS) \
 	  -o $@ $(DEV_OBJ) $(LD_REQUIRED) $(DEV_LDFLAGS)
 
-%_dev.o: %.cpp Makefile
+$(OBJ_DIR)/%_dev.o: %.cpp Makefile custom.mk | $(OBJ_DIR)
 	$(DEV_CC) $(DEV_OPTL) $(DEV_SWITCHES) $(CC_REQUIRED) $(DEV_CFLAGS) $(DEPFLAGS) -c $< -o $@ \
 	  -DFLIT_HOST='"$(HOSTNAME)"'         \
 	  -DFLIT_COMPILER='"$(DEV_CC)"'       \
@@ -395,10 +413,10 @@ $(DEV_TARGET): $(DEV_OBJ) Makefile
 	  -DFLIT_FILENAME='"$(notdir $(DEV_TARGET))"'
 
 ifdef HAS_CUDA
-$(DEV_CUTARGET): $(DEV_CUOBJ) Makefile
+$(DEV_CUTARGET): $(DEV_CUOBJ) Makefile custom.mk
 	$(NVCC) $(NVCC_LINK) $(DEV_NVCC_LD) $(DEV_CUOBJ) -o $(DEV_CUTARGET)
 
-%_cu_dev.o: %.cpp
+$(OBJ_DIR)/%_cu_dev.o: %.cpp Makefile custom.mk | $(OBJ_DIR)
 	$(NVCC) -c $(NVCC_CFLAGS) $(DEV_NVCC_CC) $< -o $@
 endif # ifdef HAS_CUDA
 
@@ -406,10 +424,10 @@ endif # ifdef HAS_CUDA
 $(GT_OUT): $(GT_TARGET)
 	./$(GT_TARGET) --output $(GT_OUT)
 
-$(GT_TARGET): $(GT_OBJ) Makefile
+$(GT_TARGET): $(GT_OBJ) Makefile custom.mk
 	$(GT_CC) $(CC_REQUIRED) -o $@ $(GT_OBJ) $(LD_REQUIRED)
 
-%_gt.o: %.cpp Makefile
+$(OBJ_DIR)/%_gt.o: %.cpp Makefile custom.mk | $(OBJ_DIR)
 	$(GT_CC) $(GT_OPTL) $(GT_SWITCHES) $(CC_REQUIRED) $(DEPFLAGS) -c $< -o $@ \
 	  -DFLIT_HOST='"$(HOSTNAME)"'         \
 	  -DFLIT_COMPILER='"$(GT_CC)"'       \
@@ -436,21 +454,25 @@ PERCENT         := %
 # @param $3: variable name containing the optimization level (e.g. O2)
 define TARGETS_RULE
 # run test and collect results
-$$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3)_out.csv: $$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3) $$(GT_OUT)
+$$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3)_out.csv: \
+  $$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3) $$(GT_OUT)
 	-./$$< --output $$@ --ground-truth $$(GT_OUT)
 
 # link
-$$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3) : $$(SOURCE:$(PERCENT).cpp=$(PERCENT)_$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3).o)
+$$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3) : \
+  $$(addprefix $$(OBJ_DIR)/, \
+     $$(notdir $$(SOURCE:$(PERCENT).cpp=$(PERCENT)_$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3).o)))
 	mkdir -p $$(RESULTS_DIR)
 	-$$($(strip $2)) $$($(strip $1)) $$($(strip $3)) $$($(strip $2)_REQUIRED) \
 	$$(CC_REQUIRED) $$^ -o $$@ $$(LD_REQUIRED)
-	rm -f $$(SOURCE:.cpp=_$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3).o)
 
 # TODO: set FLIT_COMPILER to the compiler name, not the executable used
 # compile
-%_$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3).o : %.cpp
+$$(OBJ_DIR)/%_$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3).o: \
+  %.cpp Makefile custom.mk | $$(OBJ_DIR)
 	-$$($(strip $2)) -c $$($(strip $1)) $$($(strip $3)) $$(CC_REQUIRED) \
 	   $$($(strip $2)_REQUIRED) $$< -o $$@                           \
+	   $$(DEPFLAGS)                                                  \
 	   -DFLIT_HOST='"$$(HOSTNAME)"'                                  \
 	   -DFLIT_COMPILER='"$$($(strip $2))"'                           \
 	   -DFLIT_OPTL='"$$($(strip $3))"'                               \
@@ -476,17 +498,19 @@ $(foreach c, $(COMPILERS),                \
 #            (e.g. UNSOPTS for --funsafe-math-optimizations)
 define CUTARGETS_RULE
 #run test
-NVCC_$$(HOSTNAME)_$(strip $1)_out.csv : NVCC_$$(HOSTNAME)_$(strip $1) $$(GT_OUT)
+NVCC_$$(HOSTNAME)_$(strip $1)_out.csv: NVCC_$$(HOSTNAME)_$(strip $1) $$(GT_OUT)
 	./$$< --output $$@ --ground-truth $$(GT_OUT)
 
 #link test
-NVCC_$$(HOSTNAME)_$(strip $1) : $$(CUSOURCE:$(PERCENT).cpp=$(PERCENT)_NVCC_$$(HOSTNAME)_$(strip $1).o)
+NVCC_$$(HOSTNAME)_$(strip $1): \
+  $$(addprefix $$(OBJ_DIR)/, \
+     $$(notdir $$(CUSOURCE:$(PERCENT).cpp=$(PERCENT)_NVCC_$$(HOSTNAME)_$(strip $1).o)))
 	$$(NVCC) $$($(strip $1)) $$(NVCC_LINK) $$^ -o $$@
-	rm -f $$(CUSOURCE:$(PERCENT).cpp=$(PERCENT)_NVCC_$$(HOSTNAME)_$(strip $1).o)
 
 #compile test
-%_NVCC_$$(HOSTNAME)_$(strip $1).o : %.cpp
+$(OBJ_DIR)/%_NVCC_$$(HOSTNAME)_$(strip $1).o : %.cpp Makefile custom.mk | $(OBJ_DIR)
 	-$$(NVCC) -c $$($(strip $1)) $$(NVCC_CFLAGS) $$< -o $$@          \
+	   $$(DEPFLAGS)                                                  \
 	   -DFLIT_HOST='"$$(HOSTNAME)"'                                  \
 	   -DFLIT_COMPILER='"$$(NVCC)"'                                  \
 	   -DFLIT_OPTL='"$$($(strip $3))"'                               \
@@ -498,4 +522,6 @@ endef
 # define individual rules for all elements of $(CUTARGETS)
 $(foreach s, $(CUSWITCHES), $(eval $(call CUTARGETS_RULE, $s)))
 
--include custom.mk
+.PRECIOUS: %.d
+-include $(DEPS) $(CUDEPS) $(DEV_DEPS) $(GT_DEPS)
+
diff --git a/data/custom.mk b/data/custom.mk
index 81e208a5..8db5b81a 100644
--- a/data/custom.mk
+++ b/data/custom.mk
@@ -29,7 +29,7 @@ DEV_CFLAGS     +=
 DEV_LDFLAGS    +=
 
 # required compiler flags for CUDA
-NVCC_FLAGS     +=
+NVCC_CFLAGS    +=
 
 # required link flags for CUDA
 NVCC_LINK      +=

From f1db49b3d12018b9394a28ba3cab7f842e504036 Mon Sep 17 00:00:00 2001
From: Michael Bentley <bentley8@llnl.gov>
Date: Thu, 6 Jul 2017 10:34:15 -0700
Subject: [PATCH 3/5] Makefile.in: add runbuild target to build without
 executing

---
 data/Makefile.in | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/data/Makefile.in b/data/Makefile.in
index 0c73daa5..416e96ea 100644
--- a/data/Makefile.in
+++ b/data/Makefile.in
@@ -109,6 +109,7 @@ help:
 	@echo '  devcuda     Only run the devel CUDA compilation to test CUDA out'
 	@echo '  groundtruth Compile the ground-truth version and get its output'
 	@echo '  gt          Same as groundtruth'
+	@echo '  runbuild    Build all executables needed for the run target'
 	@echo '  run         Run all combinations of compilation, results in results/'
 	@echo '  clean       Clean intermediate files'
 	@echo '  veryclean   Runs clean + removes targets and results'
@@ -276,15 +277,15 @@ SWITCHES_INTEL  += SINGLEPRECCONST
 SWITCHES_INTEL  += SSE
 SWITCHES_INTEL  += USEFASTM
 
-TARGETS         := $(foreach c, $(COMPILERS), \
+TARGET_OUTS     := $(foreach c, $(COMPILERS), \
                      $(foreach s, $(SWITCHES_$(strip $c)), \
                        $(foreach o, $(OPCODES), \
                          $(RESULTS_DIR)/$c_$(HOSTNAME)_$(strip $s)_$(strip $o)_out.csv \
                         ) \
                       ) \
                     )
-BIN             := $(TARGETS:%_out.csv=%)
-DEPS             = $(foreach b,$(BIN), \
+TARGETS         := $(TARGET_OUTS:%_out.csv=%)
+DEPS             = $(foreach b,$(TARGETS), \
                      $(addprefix $(OBJ_DIR)/,$(notdir $(SOURCE:%.cpp=%_$(notdir $b).d))) \
                     )
 OBJ_CLEAN        = $(addprefix $(OBJ_DIR)/,$(notdir $(SOURCE:%.cpp=%_*.o)))
@@ -327,27 +328,30 @@ CUSWITCHES      += PRECDTC
 CUSWITCHES      += PRECSFC
 CUSWITCHES      += PRECSTC
 
-CUTARGETS       := $(foreach s, $(CUSWITCHES), \
+CUTARGET_OUTS   := $(foreach s, $(CUSWITCHES), \
                      $(RESULTS_DIR)/NVCC_$(HOSTNAME)_$(strip $(s))_out.csv)
-CUBIN           := $(CUTARGETS:%_out.csv=%)
-CUDEPS           = $(foreach b,$(CUBIN), \
+CUTARGETS       := $(CUTARGET_OUTS:%_out.csv=%)
+CUDEPS           = $(foreach b,$(CUTARGETS), \
                      $(addprefix $(OBJ_DIR)/, \
                        $(notdir $(CUSOURCE:%.cpp=%_$(notdir $b).d))))
 
 endif # ifdef HAS_CUDA
 
-.PHONY: dev devcuda gt groundtruth run
+.PHONY: dev devcuda gt groundtruth run runbuild
 dev: $(DEV_TARGET)
 devcuda: $(DEV_CUTARGET)
 gt: $(GT_TARGET) $(GT_OUT)
 groundtruth: $(GT_TARGET) $(GT_OUT)
 
-run: $(TARGETS) $(CUTARGETS)
+run: $(TARGET_OUTS) $(CUTARGET_OUTS) runbuild
+runbuild: $(TARGETS) $(CUTARGETS)
 
 .PHONY: clean
 clean:
-	rm -f $(OBJ_CLEAN)
-	rm -f $(DEP_CLEAN)
+	# Here we do it this way because we were running into the error of too many
+	# arguments given to rm.
+	$(foreach obj,$(OBJ_CLEAN),rm -f $(obj);)
+	$(foreach obj,$(DEP_CLEAN),rm -f $(obj);)
 	-rmdir $(OBJ_DIR)
 
 .PHONY: veryclean distclean
@@ -355,12 +359,12 @@ veryclean: distclean
 distclean: clean
 	rm -f $(DEV_TARGET)
 	rm -f $(DEV_CUTARGET)
+	rm -f $(TARGET_OUTS)
+	rm -f $(addsuffix *.dat,$(TARGET_OUTS))
 	rm -f $(TARGETS)
-	rm -f $(addsuffix *.dat,$(TARGETS))
-	rm -f $(BIN)
+	rm -f $(CUTARGET_OUTS)
+	rm -f $(addsuffix *.dat,$(CUTARGET_OUTS))
 	rm -f $(CUTARGETS)
-	rm -f $(addsuffix *.dat,$(CUTARGETS))
-	rm -f $(CUBIN)
 	rm -f $(GT_TARGET)
 	rm -f $(GT_OUT)
 	rm -f $(addsuffix *.dat,$(GT_OUT))
@@ -387,8 +391,8 @@ cleanlibflit:
 
 $(DEV_TARGET): lib/libflit.so
 $(GT_TARGET): lib/libflit.so
-$(BIN): lib/libflit.so
-$(CUBIN): lib/libflit.so
+$(TARGETS): lib/libflit.so
+$(CUTARGETS): lib/libflit.so
 endif # ifeq ($(UNAME_S),Darwin): meaning, we are on a mac
 
 
@@ -429,7 +433,7 @@ $(GT_TARGET): $(GT_OBJ) Makefile custom.mk
 
 $(OBJ_DIR)/%_gt.o: %.cpp Makefile custom.mk | $(OBJ_DIR)
 	$(GT_CC) $(GT_OPTL) $(GT_SWITCHES) $(CC_REQUIRED) $(DEPFLAGS) -c $< -o $@ \
-	  -DFLIT_HOST='"$(HOSTNAME)"'         \
+	  -DFLIT_HOST='"$(HOSTNAME)"'        \
 	  -DFLIT_COMPILER='"$(GT_CC)"'       \
 	  -DFLIT_OPTL='"$(GT_OPTL)"'         \
 	  -DFLIT_SWITCHES='"$(GT_SWITCHES)"' \
@@ -446,7 +450,7 @@ PERCENT         := %
 #   1. compiling
 #   2. running
 #   3. replacing placeholders in output
-# for a single target from TARGETS.
+# for a single target from TARGET_OUTS.
 #
 # @param $1: variable name containing compiler switch(es) for this compilation
 #            (e.g. UNSOPTS for --funsafe-math-optimizations)
@@ -481,7 +485,7 @@ $$(OBJ_DIR)/%_$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3).o: \
 endef
 # end of define TARGETS_RULE
 
-# Define individual rules for all elements of $(TARGETS)
+# Define individual rules for all elements of $(TARGET_OUTS)
 $(foreach c, $(COMPILERS),                \
   $(foreach s, $(SWITCHES_$(strip $c)),   \
     $(foreach o, $(OPCODES),              \
@@ -492,7 +496,7 @@ $(foreach c, $(COMPILERS),                \
 #   1. compiling
 #   2. running
 #   3. replacing placeholders in output
-# for a single target from CUTARGETS.
+# for a single target from CUTARGET_OUTS.
 #
 # @param $1: variable name containing compiler switch(es) for this compilation
 #            (e.g. UNSOPTS for --funsafe-math-optimizations)
@@ -519,7 +523,7 @@ $(OBJ_DIR)/%_NVCC_$$(HOSTNAME)_$(strip $1).o : %.cpp Makefile custom.mk | $(OBJ_
 endef
 # end of def CUTARGETS_RULE
 
-# define individual rules for all elements of $(CUTARGETS)
+# define individual rules for all elements of $(CUTARGET_OUTS)
 $(foreach s, $(CUSWITCHES), $(eval $(call CUTARGETS_RULE, $s)))
 
 .PRECIOUS: %.d

From 9ecf25f680b255732d303a4d370ccdaff9f94291 Mon Sep 17 00:00:00 2001
From: Michael Bentley <bentley8@llnl.gov>
Date: Thu, 6 Jul 2017 14:32:33 -0700
Subject: [PATCH 4/5] flit import: change default to create a new run

Also:
- change command-line options for flit import to be --append, --label,
  and --run.  Simpler and makes more sense.  The documentation is now
  more condensed too.
- change the column 'notes' to 'label' in the runs table of the database
---
 data/db/InstallFlitDB.sh            |  8 +--
 data/db/tables-psql.sql             |  8 +--
 data/db/tables-sqlite.sql           |  2 +-
 documentation/database-structure.md |  6 +-
 scripts/flitcli/flit_import.py      | 93 +++++++++++++----------------
 scripts/run_all.py                  | 10 ++--
 6 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/data/db/InstallFlitDB.sh b/data/db/InstallFlitDB.sh
index 45dfb405..8f8d8a56 100755
--- a/data/db/InstallFlitDB.sh
+++ b/data/db/InstallFlitDB.sh
@@ -1014,14 +1014,14 @@ $$;
 -- Name: dofullflitimport(text, text); Type: FUNCTION; Schema: public; Owner: -
 --
 
-CREATE FUNCTION dofullflitimport(path text, notes text) RETURNS integer[]
+CREATE FUNCTION dofullflitimport(path text, label text) RETURNS integer[]
     LANGUAGE plpython3u
     AS $$
 import datetime
 
-query = ("INSERT INTO runs (rdate, notes) "
+query = ("INSERT INTO runs (rdate, label) "
          "VALUES ('" + str(datetime.datetime.now())  +
-         "','" + notes + "')")
+         "','" + label + "')")
 plpy.execute(query)
 query = ("SELECT MAX(index) from runs")
 res = plpy.execute(query)
@@ -1340,7 +1340,7 @@ CREATE TABLE opcodes (
 CREATE TABLE runs (
     index integer NOT NULL,
     rdate timestamp without time zone,
-    notes text
+    label text
 );
 
 
diff --git a/data/db/tables-psql.sql b/data/db/tables-psql.sql
index b45c2c07..576de287 100644
--- a/data/db/tables-psql.sql
+++ b/data/db/tables-psql.sql
@@ -355,14 +355,14 @@ $$;
 -- Name: dofullflitimport(text, text); Type: FUNCTION; Schema: public; Owner: -
 --
 
-CREATE FUNCTION dofullflitimport(path text, notes text) RETURNS integer[]
+CREATE FUNCTION dofullflitimport(path text, label text) RETURNS integer[]
     LANGUAGE plpython3u
     AS $$
 import datetime
 
-query = ("INSERT INTO runs (rdate, notes) "
+query = ("INSERT INTO runs (rdate, label) "
          "VALUES ('" + str(datetime.datetime.now())  +
-         "','" + notes + "')")
+         "','" + label + "')")
 plpy.execute(query)
 query = ("SELECT MAX(index) from runs")
 res = plpy.execute(query)
@@ -681,7 +681,7 @@ CREATE TABLE opcodes (
 CREATE TABLE runs (
     index integer NOT NULL,
     rdate timestamp without time zone,
-    notes text
+    label text
 );
 
 
diff --git a/data/db/tables-sqlite.sql b/data/db/tables-sqlite.sql
index 753110b5..ccd70cb1 100644
--- a/data/db/tables-sqlite.sql
+++ b/data/db/tables-sqlite.sql
@@ -25,7 +25,7 @@ CREATE TABLE IF NOT EXISTS runs (
   rdate          timestamp,
 
   -- The message describing what this run is all about
-  notes          text
+  label          text
   );
 
 --
diff --git a/documentation/database-structure.md b/documentation/database-structure.md
index da31a609..6aa16341 100644
--- a/documentation/database-structure.md
+++ b/documentation/database-structure.md
@@ -30,8 +30,8 @@ CREATE TABLE runs (
   -- it and convert it to a sqlite3 basic type and back.
   rdate          timestamp,
 
-  -- The message describing what this run is all about
-  notes          text
+  -- The label for the run describing what it is about
+  label          text
   );
 CREATE TABLE tests (
   id             integer    primary key autoincrement     not null,
@@ -57,7 +57,7 @@ This output is as of this writing.  You can execute those same commands to see
 the exact schema used in your version of FLiT.
 
 The `runs` table only stores information about each executed full run, the id,
-datetime and user-specified label for the run (called `notes`).
+datetime and user-specified label for the run (called `label`).
 
 The `tests` table contains the actual test results.  Each row has a run number
 that matches the `id` field of the `runs` table, so you can do things like:
diff --git a/scripts/flitcli/flit_import.py b/scripts/flitcli/flit_import.py
index 09f2613d..5b4c3024 100644
--- a/scripts/flitcli/flit_import.py
+++ b/scripts/flitcli/flit_import.py
@@ -35,34 +35,30 @@ def main(arguments, prog=sys.argv[0]):
                             File(s) to import into the database.  These files
                             may be csv files or sqlite3 databases.
                             ''')
-    #parser.add_argument('-t', '--table', default='tests',
-    #                    help='''
-    #                        The database table used for import (default is tests)
-    #                        ''')
-    parser.add_argument('-r', '--run', type=int, default=-1,
+    parser.add_argument('-a', '--append', type=int, default=None, metavar='RUN_ID',
                         help='''
-                            The run number to import under.  If the run does
-                            not exist in the runs table, then a new entry will
-                            be created with an autogenerated message.  The
-                            default behavior is to use the latest run in the
-                            database (but one will be created if there is no
-                            runs).  If importing from an sqlite database, the
-                            run column of the tests table is ignored since
-                            there is no necessary correlation between run
-                            numbers of that database and this database.  So you
-                            would still want to use the --run option.  For the
-                            sqlite database case, the results imported will
-                            only be from the latest run in the importing
-                            database.
+                            Append the import to the specified run id.  The
+                            default behavior is to add a new run to include the
+                            results of the import.  You must specify a run id
+                            that already exists in the database.
                             ''')
-    parser.add_argument('--new-run', action='store_true',
+    parser.add_argument('-l', '--label', default='Imported using flit import',
                         help='''
-                            Specifies that this import should be under a new
-                            run number that will be autogenerated.  This option
-                            conflicts with the --run option, meaning if this
-                            argument is specified, then the --run argument will
-                            be ignored.  This option is also implied if the
-                            destination database has no runs in it.
+                            The label to attach to the run.  Only applicable
+                            when creating a new run.  This argument is ignored
+                            if --append is specified.  The default label is
+                            'Imported using flit import'.
+                            ''')
+    parser.add_argument('-r', '--run', type=int, default=None,
+                        help='''
+                            Only applicable to the importing of sqlite3
+                            database files.  This will apply to all sqlite3
+                            database files passed in.  Only this run id will be
+                            imported from the provided database.  The default
+                            behavior is to import the latest run.  You cannot
+                            specify more than one run to be imported, you must
+                            call this program multiple times, each one with
+                            --run specified to the next run you want to import.
                             ''')
     args = parser.parse_args(arguments)
 
@@ -77,47 +73,42 @@ def main(arguments, prog=sys.argv[0]):
             'Only sqlite database supported'
     db = util.sqlite_open(projconf['database']['filepath'])
 
-    # if the database has no runs, then turn on --new-run
-    run_ids = [x['id'] for x in db.execute('select id from runs')]
-    if len(run_ids) == 0:
-        args.new_run = True
-    print('run_ids: ', run_ids)
-
-    # Find the destination run
-    if not args.new_run:
-        assert args.run <= 0 or args.run in run_ids, \
-                'Specified run {0} is not in the runs table'.format(args.run)
-        if args.run not in run_ids:
-            args.run = sorted(run_ids)[-1]
-    else: # args.new_run
+    # create a new run and set the args.append run id
+    if args.append is None:
         # Create a new run to use in import
-        db.execute('insert into runs(rdate,notes) values (?,?)',
-                (datetime.datetime.now(), 'Imported using flit import'))
+        db.execute('insert into runs(rdate,label) values (?,?)',
+                (datetime.datetime.now(), args.label))
         db.commit()
-        args.run = db.execute('select id from runs order by id').fetchall()[-1]['id']
+        args.append = db.execute('select id from runs order by id').fetchall()[-1]['id']
+
+    # Make sure the run id exists.
+    run_ids = [x['id'] for x in db.execute('select id from runs')]
+    assert args.append in run_ids, \
+            'Specified append run id {0} is not in the runs ' \
+            'table'.format(args.append)
 
     for importee in args.importfile:
-        print(importee)
+        print('Importing', importee)
         if util.is_sqlite(importee):
-            # Try to treat the importfile like a sqlite database
             import_db = util.sqlite_open(importee)
             cur = import_db.cursor()
             cur.execute('select id from runs')
-            run_ids = sorted([x['id'] for x in cur])
-            if len(run_ids) == 0:
-                print('  nothing to import')
+            importee_run_ids = sorted([x['id'] for x in cur])
+            if len(importee_run_ids) == 0:
+                print('  no runs in database: nothing to import')
                 continue
-            latest_run = run_ids[-1]
+            latest_run = importee_run_ids[-1]
+            import_run = args.run if args.run is not None else latest_run
             cur.execute('select name,host,compiler,optl,switches,precision,'
                         'comparison,comparison_d,file,nanosec '
-                        'from tests where run = ?', (latest_run,))
-            rows = cur.fetchall()
+                        'from tests where run = ?', (import_run,))
+            rows = [dict(x) for x in cur]
         else:
             with open(importee, 'r') as csvin:
                 reader = csv.DictReader(csvin)
                 rows = [row for row in reader]
         if len(rows) == 0:
-            print('  nothing to import')
+            print('  zero rows: nothing to import')
             continue
         to_insert = []
         for row in rows:
@@ -126,7 +117,7 @@ def main(arguments, prog=sys.argv[0]):
                 row[key] = val if val != 'NULL' else None
             # Insert
             to_insert.append((
-                args.run,
+                args.append,
                 row['name'],
                 row['host'],
                 row['compiler'],
diff --git a/scripts/run_all.py b/scripts/run_all.py
index cd048639..fc02a6b6 100755
--- a/scripts/run_all.py
+++ b/scripts/run_all.py
@@ -23,7 +23,7 @@
 home_dir = os.path.dirname(os.path.realpath(__file__))
 
 #vars
-notes = ''
+label = ''
 DB_HOST_AUX = '/tmp/flitDbDir'
 DBINIT = 'prepDBHost.py'
 db_host = hostfile.DB_HOST
@@ -37,7 +37,7 @@
 pwds = {}
 
 def usage():
-    print('usage: ' + sys.argv[0] + ' "notes"')
+    print('usage: ' + sys.argv[0] + ' "label"')
     print('\tyou must populate ' + home_dir + '/hostfile.py with')
     print('\trun and db host info (see file for details)')
 
@@ -116,7 +116,7 @@ def getPasswords():
             )
 
 if len(sys.argv) == 2:
-    notes = sys.argv[1]
+    label = sys.argv[1]
 
 else:
     usage()
@@ -150,8 +150,8 @@ def getPasswords():
 # #get run# from db
 print(check_output(['sshpass', '-e', *SSHL,
                     db_host[0] + '@' + db_host[1],
-              'psql flit -t -c "insert into runs (rdate, notes) ' +
-              'values (\'' + str(datetime.now()) + '\', \'' + notes + '\')"'],
+              'psql flit -t -c "insert into runs (rdate, label) ' +
+              'values (\'' + str(datetime.now()) + '\', \'' + label + '\')"'],
                    env=new_env).decode("utf-8"))
 run_num = int(check_output(['sshpass', '-e', *SSHL,
                             db_host[0] + '@' + db_host[1],

From ec165640bb1c5913a4b3536cb94efdafbafe187a Mon Sep 17 00:00:00 2001
From: Michael Bentley <mikebentley15@gmail.com>
Date: Sat, 8 Jul 2017 14:17:17 -0700
Subject: [PATCH 5/5] compare in the ground truth executable

This is instead of running the comparison separately
in the many compiled test executables.  The main
problem with the way it used to be done is that the
compilation of the test executable may influence the
compare() method.

The ground truth executable is executed now at the
very end and all of the results from all tests are
passed in all at once.  The test result files are
updated with the values of the comparison (so they
are rewritten with that column filled in).
---
 data/Makefile.in |  23 ++++---
 src/flit.cpp     |  78 ++++++++++++++++------
 src/flit.h       | 169 ++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 206 insertions(+), 64 deletions(-)

diff --git a/data/Makefile.in b/data/Makefile.in
index 416e96ea..8a75a5fe 100644
--- a/data/Makefile.in
+++ b/data/Makefile.in
@@ -107,7 +107,7 @@ help:
 	@echo '  help        Show this help and exit (default target)'
 	@echo '  dev         Only run the devel compilation to test things out'
 	@echo '  devcuda     Only run the devel CUDA compilation to test CUDA out'
-	@echo '  groundtruth Compile the ground-truth version and get its output'
+	@echo '  groundtruth Compile the ground-truth version'
 	@echo '  gt          Same as groundtruth'
 	@echo '  runbuild    Build all executables needed for the run target'
 	@echo '  run         Run all combinations of compilation, results in results/'
@@ -340,11 +340,11 @@ endif # ifdef HAS_CUDA
 .PHONY: dev devcuda gt groundtruth run runbuild
 dev: $(DEV_TARGET)
 devcuda: $(DEV_CUTARGET)
-gt: $(GT_TARGET) $(GT_OUT)
-groundtruth: $(GT_TARGET) $(GT_OUT)
+gt: groundtruth
+groundtruth: $(GT_TARGET)
 
-run: $(TARGET_OUTS) $(CUTARGET_OUTS) runbuild
-runbuild: $(TARGETS) $(CUTARGETS)
+run: $(TARGET_OUTS) $(CUTARGET_OUTS) runbuild $(GT_OUT)
+runbuild: $(TARGETS) $(CUTARGETS) groundtruth
 
 .PHONY: clean
 clean:
@@ -425,8 +425,9 @@ $(OBJ_DIR)/%_cu_dev.o: %.cpp Makefile custom.mk | $(OBJ_DIR)
 endif # ifdef HAS_CUDA
 
 # Ground truth compilation rules
-$(GT_OUT): $(GT_TARGET)
-	./$(GT_TARGET) --output $(GT_OUT)
+$(GT_OUT): $(GT_TARGET) $(TARGET_OUTS) $(CUTARGET_OUTS)
+	./$(GT_TARGET) --output $(GT_OUT) \
+	  --compare-mode $(TARGET_OUTS) $(CUTARGET_OUTS)
 
 $(GT_TARGET): $(GT_OBJ) Makefile custom.mk
 	$(GT_CC) $(CC_REQUIRED) -o $@ $(GT_OBJ) $(LD_REQUIRED)
@@ -459,8 +460,8 @@ PERCENT         := %
 define TARGETS_RULE
 # run test and collect results
 $$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3)_out.csv: \
-  $$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3) $$(GT_OUT)
-	-./$$< --output $$@ --ground-truth $$(GT_OUT)
+  $$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3)
+	-./$$< --output $$@
 
 # link
 $$(RESULTS_DIR)/$(strip $2)_$$(HOSTNAME)_$(strip $1)_$(strip $3) : \
@@ -502,8 +503,8 @@ $(foreach c, $(COMPILERS),                \
 #            (e.g. UNSOPTS for --funsafe-math-optimizations)
 define CUTARGETS_RULE
 #run test
-NVCC_$$(HOSTNAME)_$(strip $1)_out.csv: NVCC_$$(HOSTNAME)_$(strip $1) $$(GT_OUT)
-	./$$< --output $$@ --ground-truth $$(GT_OUT)
+NVCC_$$(HOSTNAME)_$(strip $1)_out.csv: NVCC_$$(HOSTNAME)_$(strip $1)
+	./$$< --output $$@
 
 #link test
 NVCC_$$(HOSTNAME)_$(strip $1): \
diff --git a/src/flit.cpp b/src/flit.cpp
index c223d04c..13edce6d 100644
--- a/src/flit.cpp
+++ b/src/flit.cpp
@@ -94,17 +94,22 @@ std::string FlitOptions::toString() {
   messanger
     << "Options:\n"
     << "  help:         " << boolToString(this->help) << "\n"
-    << "  listTests:    " << boolToString(this->listTests) << "\n"
     << "  verbose:      " << boolToString(this->verbose) << "\n"
     << "  timing:       " << boolToString(this->timing) << "\n"
     << "  timingLoops:  " << this->timingLoops << "\n"
-    << "  output:       " << this->output << "\n"
-    << "  groundTruth:  " << this->groundTruth << "\n"
+    << "  listTests:    " << boolToString(this->listTests) << "\n"
     << "  precision:    " << this->precision << "\n"
+    << "  output:       " << this->output << "\n"
+    << "  compareMode:  " << boolToString(this->compareMode) << "\n"
     << "  tests:\n";
   for (auto& test : this->tests) {
     messanger << "    " << test << "\n";
   }
+  messanger
+    << "  compareFiles:\n";
+  for (auto& filename : this->compareFiles) {
+    messanger << "    " << filename << "\n";
+  }
   return messanger.str();
 }
 
@@ -118,7 +123,7 @@ FlitOptions parseArguments(int argCount, char* argList[]) {
   std::vector<std::string> listTestsOpts     = { "-L", "--list-tests" };
   std::vector<std::string> precisionOpts     = { "-p", "--precision" };
   std::vector<std::string> outputOpts        = { "-o", "--output" };
-  std::vector<std::string> groundTruthOpts   = { "-g", "--ground-truth" };
+  std::vector<std::string> compareMode       = { "-c", "--compare-mode" };
   std::vector<std::string> allowedPrecisions = {
     "all", "float", "double", "long double"
   };
@@ -158,19 +163,25 @@ FlitOptions parseArguments(int argCount, char* argList[]) {
         throw ParseException(current + " requires an argument");
       }
       options.output = argList[++i];
-    } else if (isIn(groundTruthOpts, current)) {
-      if (i+1 == argCount) {
-        throw ParseException(current + " requires an argument");
-      }
-      options.groundTruth = argList[++i];
+    } else if (isIn(compareMode, current)) {
+      options.compareMode = true;
     } else {
       options.tests.push_back(current);
-      if (!isIn(allowedTests, current)) {
+      if (!options.compareMode && !isIn(allowedTests, current)) {
         throw ParseException("unknown test " + current);
       }
     }
   }
 
+  // names passed on the command line in compareMode are compareFiles not tests
+  if (options.compareMode) {
+    options.tests.swap(options.compareFiles);
+    options.tests.emplace_back("all");
+    if (options.compareFiles.size() == 0) {
+      throw ParseException("You must pass in some test results in compare mode");
+    }
+  }
+
   if (options.tests.size() == 0 || isIn(options.tests, std::string("all"))) {
     options.tests = getKeys(getTests());
   }
@@ -183,7 +194,8 @@ std::string usage(std::string progName) {
   std::ostringstream messanger;
   messanger
     << "Usage:\n"
-    << "  " << progName << " [options] [[test] ...]\n"
+    << "  " << progName << " [options] [<test> ...]\n"
+    << "  " << progName << " --compare-mode <csvfile> [<csvfile> ...]\n"
     << "\n"
        "Description:\n"
        "  Runs the FLiT tests and outputs the results to the console in CSV\n"
@@ -216,17 +228,19 @@ std::string usage(std::string progName) {
        "                  standard output will still go to the terminal.\n"
        "                  The default behavior is to output to stdout.\n"
        "\n"
-       "  -g INFILE, --ground-truth INFILE\n"
-       "                  Use the following results file (usually generated\n"
-       "                  using the --output option with the ground-truth\n"
-       "                  compiled executable).  This option allows the\n"
-       "                  creation of data for the comparison column in the\n"
-       "                  results.  The test's compare() function is used.\n"
+       "  -c, --compare-mode\n"
+       "                  This option only makes sense to use on the ground\n"
+       "                  truth executable.  You will no longer be able to\n"
+       "                  pass in particular tests to execute because the\n"
+       "                  arguments are interpreted as the results files to\n"
+       "                  use in the comparison.\n"
        "\n"
-       "                  Note: for tests outputting string data, the path\n"
-       "                  may be a relative path from where you executed the\n"
-       "                  ground-truth executable, in which case you will\n"
-       "                  want to run this test from that same directory.\n"
+       "                  Note: for tests returning a string, the results\n"
+       "                  file will contain a relative path to the file that\n"
+       "                  actually contains the string return value.  So you\n"
+       "                  will want to make sure to call this option in the\n"
+       "                  same directory used when executing the test\n"
+       "                  executable.\n"
        "\n"
        "  -p PRECISION, --precision PRECISION\n"
        "                  Which precision to run.  The choices are 'float',\n"
@@ -268,6 +282,28 @@ std::vector<TestResult> parseResults(std::istream &in) {
   return results;
 }
 
+std::unordered_map<std::string, std::string> parseMetadata(std::istream &in) {
+  std::unordered_map<std::string, std::string> metadata;
+
+  const std::string metadataKeys[] = {
+    "host",
+    "compiler",
+    "optl",
+    "switches",
+    "file"
+  };
+
+  Csv csv(in);
+  CsvRow row;
+  if (csv >> row) {
+    for (auto key : metadataKeys) {
+      metadata.emplace(key, row[key]);
+    }
+  }
+
+  return metadata;
+}
+
 std::string removeIdxFromName(const std::string &name) {
   std::string pattern("_idx"); // followed by 1 or more digits
   auto it = std::find_end(name.begin(), name.end(),
diff --git a/src/flit.h b/src/flit.h
index 09aedf98..eb1a5392 100644
--- a/src/flit.h
+++ b/src/flit.h
@@ -20,6 +20,8 @@
 #include <sstream>
 #include <type_traits>
 #include <typeinfo>
+#include <unordered_map>
+#include <utility>
 
 #include <cstring>
 
@@ -62,7 +64,8 @@ struct FlitOptions {
   std::string output = "";        // output file for results.  default stdout
   bool timing = true;     // should we run timing?
   int timingLoops = 1;    // < 1 means to auto-determine the timing loops
-  std::string groundTruth = "";   // input for ground-truth comparison
+  bool compareMode = false; // compare results after running the test
+  std::vector<std::string> compareFiles; // files for compareMode
 
   /** Give a string representation of this struct for printing purposes */
   std::string toString();
@@ -73,6 +76,19 @@ struct FlitOptions {
   }
 };
 
+template<typename A, typename B>
+struct pair_hash {
+  // This is from python's implementation of hashing a tuple
+  size_t operator()(const std::pair<A, B> &thepair) const {
+    std::hash<A> hasherA;
+    std::hash<B> hasherB;
+    size_t value = 0x345678;
+    value = (1000003 * value) ^ hasherA(thepair.first);
+    value = (1000003 * value) ^ hasherB(thepair.second);
+    return value;
+  }
+};
+
 /** Parse arguments */
 FlitOptions parseArguments(int argCount, char* argList[]);
 
@@ -85,11 +101,74 @@ std::string readFile(const std::string &filename);
 /** Parse the results file into a vector of results */
 std::vector<TestResult> parseResults(std::istream &in);
 
+/** Parse the result file to get metadata from the first row */
+std::unordered_map<std::string, std::string> parseMetadata(std::istream &in);
+
 /** Test names sometimes are postfixed with "_idx" + <num>.  Remove that postfix */
 std::string removeIdxFromName(const std::string &name);
 
-inline void outputResults (const std::vector<TestResult>& results,
-    std::ostream& out)
+class TestResultMap {
+public:
+  using key_type = std::pair<std::string, std::string>;
+
+  void loadfile(const std::string &filename) {
+    std::ifstream resultfile(filename);
+    auto parsed = parseResults(resultfile);
+    this->extend(parsed, filename);
+  }
+
+  std::vector<TestResult*> operator[](
+      const key_type &key) const
+  {
+    std::vector<TestResult*> all_vals;
+    auto range = m_testmap.equal_range(key);
+    for (auto iter = range.first; iter != range.second; iter++) {
+      all_vals.push_back(iter->second);
+    }
+    return all_vals;
+  }
+
+  std::vector<TestResult*> fileresults(const std::string &filename) {
+    std::vector<TestResult*> all_vals;
+    auto range = m_filemap.equal_range(filename);
+    for (auto iter = range.first; iter != range.second; iter++) {
+      all_vals.push_back(&(iter->second));
+    }
+    return all_vals;
+  }
+
+private:
+  void append(const TestResult &result, const std::string &filename) {
+    auto it = m_filemap.emplace(filename, result);
+    m_testmap.emplace(key_type{result.name(), result.precision()},
+                      &(it->second));
+  }
+
+  void extend(const std::vector<TestResult> &results,
+              const std::string &filename)
+  {
+    for (auto& result : results) {
+      this->append(result, filename);
+    }
+  }
+
+private:
+  std::unordered_multimap<
+    std::pair<std::string, std::string>,
+    TestResult*,
+    pair_hash<std::string, std::string>
+    > m_testmap;   // (testname, precision) -> TestResult*
+  std::unordered_multimap<std::string, TestResult> m_filemap; // filename -> TestResult
+};
+
+inline void outputResults (
+    const std::vector<TestResult>& results,
+    std::ostream& out,
+    std::string hostname = FLIT_HOST,
+    std::string compiler = FLIT_COMPILER,
+    std::string optimization_level = FLIT_OPTL,
+    std::string switches = FLIT_SWITCHES,
+    std::string executableFilename = FLIT_FILENAME)
 {
   // Output the column headers
   out << "name,"
@@ -109,10 +188,10 @@ inline void outputResults (const std::vector<TestResult>& results,
   for(const auto& result: results){
     out
       << result.name() << ","                        // test case name
-      << FLIT_HOST << ","                            // hostname
-      << FLIT_COMPILER << ","                        // compiler
-      << FLIT_OPTL << ","                            // optimization level
-      << FLIT_SWITCHES << ","                        // compiler flags
+      << hostname << ","                             // hostname
+      << compiler << ","                             // compiler
+      << optimization_level << ","                   // optimization level
+      << switches << ","                             // compiler flags
       << result.precision() << ","                   // precision
       ;
 
@@ -147,7 +226,7 @@ inline void outputResults (const std::vector<TestResult>& results,
     }
 
     out
-      << FLIT_FILENAME << ","                        // executable filename
+      << executableFilename << ","                   // executable filename
       << result.nanosecs()                           // nanoseconds
       << std::endl;
   }
@@ -171,12 +250,12 @@ template <typename F>
 long double runComparison_impl(TestFactory* factory, const TestResult &gt,
                                const TestResult &res) {
   auto test = factory->get<F>();
-  if (!res.resultfile().empty()) {
+  if (!gt.resultfile().empty()) {
     assert(res.result().type() == Variant::Type::None);
     assert( gt.result().type() == Variant::Type::None);
     return test->compare(readFile(gt.resultfile()),
                          readFile(res.resultfile()));
-  } else if (res.result().type() == Variant::Type::LongDouble) {
+  } else if (gt.result().type() == Variant::Type::LongDouble) {
     return test->compare(gt.result().longDouble(), res.result().longDouble());
   } else { throw std::runtime_error("Unsupported variant type"); }
 }
@@ -263,13 +342,6 @@ inline int runFlitTests(int argc, char* argv[]) {
 #endif
 
   std::vector<TestResult> results;
-  std::vector<TestResult> groundTruthResults;
-  if (!options.groundTruth.empty()) {
-    std::ifstream gtfile(options.groundTruth);
-    // TODO: only load file contents at time of comparison
-    groundTruthResults = parseResults(gtfile);
-  }
-
   auto testMap = getTests();
   for (auto& testName : options.tests) {
     auto factory = testMap[testName];
@@ -286,7 +358,6 @@ inline int runFlitTests(int argc, char* argv[]) {
           factory, results, test_result_filebase, options.timing,
           options.timingLoops);
     }
-    // TODO: dump string result to file because we might run out of memory
   }
 #if defined(__CUDA__) && !defined(__CPUKERNEL__)
   cudaDeviceSynchronize();
@@ -301,23 +372,57 @@ inline int runFlitTests(int argc, char* argv[]) {
     }
   };
   std::sort(results.begin(), results.end(), testComparator);
-  std::sort(groundTruthResults.begin(), groundTruthResults.end(),
-            testComparator);
 
   // Let's now run the ground-truth comparisons
-  if (groundTruthResults.size() > 0) {
-    for (auto& res : results) {
-      auto factory = testMap[removeIdxFromName(res.name())];
-      // Use binary search to find the first associated ground truth element
-      auto gtIter = std::lower_bound(groundTruthResults.begin(),
-                                     groundTruthResults.end(), res,
-                                     testComparator);
-      // Compare the two results if the element was found
-      if (gtIter != groundTruthResults.end() &&
-          res.name() == (*gtIter).name() &&
-          res.precision() == (*gtIter).precision())
+  if (options.compareMode) {
+    TestResultMap comparisonResults;
+  
+    for (auto fname : options.compareFiles) {
+      comparisonResults.loadfile(fname);
+    }
+
+    // compare mode is only done in the ground truth compilation
+    // so "results" are the ground truth results.
+    for (auto& gtres : results) {
+      auto factory = testMap[removeIdxFromName(gtres.name())];
+      auto toCompare = comparisonResults[{gtres.name(), gtres.precision()}];
+      for (TestResult* compResult : toCompare) {
+        auto compVal = runComparison(factory, gtres, *compResult);
+        compResult->set_comparison(compVal);
+      }
+    }
+
+    // save back to the compare files with compare value set
+    for (auto fname : options.compareFiles) {
+      // read in the metadata to use in creating the file again
+      std::unordered_map<std::string, std::string> metadata;
+      {
+        std::ifstream fin(fname);
+        metadata = parseMetadata(fin);
+      }
+
+      // get all results from this file
+      auto fileresultPtrs = comparisonResults.fileresults(fname);
+      std::vector<TestResult> fileresults;
+      for (auto resultPtr : fileresultPtrs) {
+        fileresults.push_back(*resultPtr);
+      }
+
+      // sort the file results
+      std::sort(fileresults.begin(), fileresults.end(), testComparator);
+
+      // output back to a file
       {
-        res.set_comparison(runComparison(factory, *gtIter, res));
+        std::ofstream fout(fname);
+        outputResults(
+            fileresults,
+            fout,
+            metadata["host"],
+            metadata["compiler"],
+            metadata["optl"],
+            metadata["switches"],
+            metadata["file"]
+            );
       }
     }
   }